Final_Assignment_Project

Runtime error

App Files Files Community

wt002 commited on 5 days ago

Commit

b403954

verified ·

1 Parent(s): b52644e

Update app.py

Browse files

Files changed (1) hide show

app.py +328 -167

app.py CHANGED Viewed

@@ -1,209 +1,370 @@
 import os
-import gradio as gr
 import requests
 import pandas as pd
-from smolagents import CodeAgent, OpenAIServerModel, DuckDuckGoSearchTool, VisitWebpageTool, tool, \
-    FinalAnswerTool, PythonInterpreterTool, SpeechToTextTool, ToolCallingAgent
-import yaml
-import importlib
-from io import BytesIO
-import tempfile
 import base64
-from youtube_transcript_api import YouTubeTranscriptApi
-from youtube_transcript_api._errors import TranscriptsDisabled, NoTranscriptFound, VideoUnavailable
-from urllib.parse import urlparse, parse_qs
-import json
-import whisper
-import re
-# (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 @tool
-def transcribe_audio_file(file_path: str) -> str:
     """
-    Transcribes a local MP3 audio file using Whisper.
     Args:
-        file_path: Full path to the .mp3 audio file.
     Returns:
-        A JSON-formatted string containing either the transcript or an error message.
-        {
-            "success": true,
-            "transcript": [
-                {"start": 0.0, "end": 5.2, "text": "Hello and welcome"},
-                ...
-            ]
-        }
-        OR
-        {
-            "success": false,
-            "error": "Reason why transcription failed"
-        }
     """
     try:
-        if not os.path.exists(file_path):
-            return json.dumps({"success": False, "error": "File does not exist."})
-        if not file_path.lower().endswith(".mp3"):
-            return json.dumps({"success": False, "error": "Invalid file type. Only MP3 files are supported."})
-        model = whisper.load_model("base")  # You can use 'tiny', 'base', 'small', 'medium', or 'large'
-        result = model.transcribe(file_path, verbose=False, word_timestamps=False)
-        transcript_data = [
             {
-                "start": segment["start"],
-                "end": segment["end"],
-                "text": segment["text"].strip()
             }
-            for segment in result["segments"]
-        ]
-        return json.dumps({"success": True, "transcript": transcript_data})
     except Exception as e:
-        return json.dumps({"success": False, "error": str(e)})
 @tool
-def get_youtube_transcript(video_url: str) -> str:
     """
-    Retrieves the transcript from a YouTube video URL, including timestamps.
-    This tool fetches the English transcript for a given YouTube video. Automatically generated subtitles
-    are also supported. The result includes each snippet's start time, duration, and text.
     Args:
-        video_url: The full URL of the YouTube video (e.g., https://www.youtube.com/watch?v=12345)
     Returns:
-        A JSON-formatted string containing either the transcript with timestamps or an error message.
-        {
-            "success": true,
-            "transcript": [
-                {"start": 0.0, "duration": 1.54, "text": "Hey there"},
-                {"start": 1.54, "duration": 4.16, "text": "how are you"},
-                ...
-            ]
-        }
-        OR
-        {
-            "success": false,
-            "error": "Reason why the transcript could not be retrieved"
-        }
     """
     try:
-        # Extract video ID from URL
-        parsed_url = urlparse(video_url)
-        query_params = parse_qs(parsed_url.query)
-        video_id = query_params.get("v", [None])[0]
-        if not video_id:
-            return json.dumps({"success": False, "error": "Invalid YouTube URL. Could not extract video ID."})
-        fetched_transcript = YouTubeTranscriptApi().fetch(video_id)
-        transcript_data = [
-            {
-                "start": snippet.start,
-                "duration": snippet.duration,
-                "text": snippet.text
-            }
-            for snippet in fetched_transcript
-        ]
-        return json.dumps({"success": True, "transcript": transcript_data})
-    except VideoUnavailable:
-        return json.dumps({"success": False, "error": "The video is unavailable."})
-    except TranscriptsDisabled:
-        return json.dumps({"success": False, "error": "Transcripts are disabled for this video."})
-    except NoTranscriptFound:
-        return json.dumps({"success": False, "error": "No transcript found for this video."})
     except Exception as e:
-        return json.dumps({"success": False, "error": str(e)})
-# --- Basic Agent Definition ---
-# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
-class BasicAgent:
     def __init__(self):
-        model = OpenAIServerModel(api_key=os.environ.get("OPENAI_API_KEY"), model_id="gpt-4o")
-        self.code_agent = CodeAgent(
-            tools=[PythonInterpreterTool(), DuckDuckGoSearchTool(), VisitWebpageTool(), transcribe_audio_file,
-                   get_youtube_transcript,
-                   FinalAnswerTool()],
-            model=model,
-            max_steps=20,
-            name="hf_agent_course_final_assignment_solver",
-            prompt_templates=yaml.safe_load(
-                importlib.resources.files("prompts").joinpath("code_agent.yaml").read_text()
-            )
-        )
         print("BasicAgent initialized.")
-    def __call__(self, task_id: str, question: str, file_name: str) -> str:
-        if file_name:
-            question = self.enrich_question_with_associated_file_details(task_id, question, file_name)
-        final_result = self.code_agent.run(question)
-        # Extract text after "FINAL ANSWER:" (case-insensitive, and trims whitespace)
-        match = re.search(r'final answer:\s*(.*)', str(final_result), re.IGNORECASE | re.DOTALL)
-        if match:
-            return match.group(1).strip()
-        # Fallback in case the pattern is not found
-        return str(final_result).strip()
-    def enrich_question_with_associated_file_details(self, task_id:str, question: str, file_name: str) -> str:
-        api_url = DEFAULT_API_URL
-        get_associated_files_url = f"{api_url}/files/{task_id}"
-        response = requests.get(get_associated_files_url, timeout=15)
-        response.raise_for_status()
-        if file_name.endswith(".mp3"):
-            with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
-                tmp_file.write(response.content)
-                file_path = tmp_file.name
-                return question + "\n\nMentioned .mp3 file local path is: " + file_path
-        elif file_name.endswith(".py"):
-            file_content = response.text
-            return question + "\n\nBelow is mentioned Python file:\n\n```python\n" + file_content + "\n```\n"
-        elif file_name.endswith(".xlsx"):
-            xlsx_io = BytesIO(response.content)
-            df = pd.read_excel(xlsx_io)
-            file_content = df.to_csv(index=False)
-            return question + "\n\nBelow is mentioned excel file in CSV format:\n\n```csv\n" + file_content + "\n```\n"
-        elif file_name.endswith(".png"):
-            base64_str = base64.b64encode(response.content).decode('utf-8')
-            return question + "\n\nBelow is the .png image in base64 format:\n\n```base64\n" + base64_str + "\n```\n"
-    def enrich_question_with_associated_file_details(self, task_id:str, question: str, file_name: str) -> str:
-        api_url = DEFAULT_API_URL
-        get_associated_files_url = f"{api_url}/files/{task_id}"
-        response = requests.get(get_associated_files_url, timeout=15)
-        response.raise_for_status()
-        if file_name.endswith(".mp3"):
-            with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
-                tmp_file.write(response.content)
-                file_path = tmp_file.name
-                return question + "\n\nMentioned .mp3 file local path is: " + file_path
-        elif file_name.endswith(".py"):
-            file_content = response.text
-            return question + "\n\nBelow is mentioned Python file:\n\n```python\n" + file_content + "\n```\n"
-        elif file_name.endswith(".xlsx"):
-            xlsx_io = BytesIO(response.content)
-            df = pd.read_excel(xlsx_io)
-            file_content = df.to_csv(index=False)
-            return question + "\n\nBelow is mentioned excel file in CSV format:\n\n```csv\n" + file_content + "\n```\n"
-        elif file_name.endswith(".png"):
-            base64_str = base64.b64encode(response.content).decode('utf-8')
-            return question + "\n\nBelow is the .png image in base64 format:\n\n```base64\n" + base64_str + "\n```\n"
 def run_and_submit_all( profile: gr.OAuthProfile | None):

 import os
+from dotenv import load_dotenv
 import requests
 import pandas as pd
 import base64
+import mimetypes
+import tempfile
+from smolagents import CodeAgent, OpenAIServerModel, tool
+from dotenv import load_dotenv
+from openai import OpenAI
+# Load environment variables
+load_dotenv()
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# Initialize the OpenAI model using environment variable for API key
+model = OpenAIServerModel(
+    model_id="o4-mini-2025-04-16",
+    api_base="https://api.openai.com/v1",
+    api_key=os.getenv("openai"),
+)
+# Initialize OpenAI client
+openAiClient = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 @tool
+def tavily_search(query: str) -> str:
     """
+    Perform a search using the Tavily API.
     Args:
+        query: The search query string
     Returns:
+        A string containing the search results
     """
+    api_key = os.getenv("TAVILY_API_KEY")
+    if not api_key:
+        return "Error: TAVILY_API_KEY environment variable is not set"
+    api_url = "https://api.tavily.com/search"
+    headers = {
+        "Content-Type": "application/json",
+    }
+    payload = {
+        "api_key": api_key,
+        "query": query,
+        "search_depth": "advanced",
+        "include_answer": True,
+        "include_raw_content": False,
+        "max_results": 5
+    }
     try:
+        response = requests.post(api_url, headers=headers, json=payload)
+        response.raise_for_status()
+        data = response.json()
+        # Extract the answer and results
+        result = []
+        if "answer" in data:
+            result.append(f"Answer: {data['answer']}")
+        if "results" in data:
+            result.append("\nSources:")
+            for i, item in enumerate(data["results"], 1):
+                result.append(f"{i}. {item.get('title', 'No title')}: {item.get('url', 'No URL')}")
+        return "\n".join(result)
+    except Exception as e:
+        return f"Error performing Tavily search: {str(e)}"
+@tool
+def analyze_image(image_url: str) -> str:
+    """
+    Analyze an image using OpenAI's vision model and return a description.
+    Args:
+        image_url: URL of the image to analyze
+    Returns:
+        A detailed description of the image
+    """
+    api_key = os.getenv("OPENAI_API_KEY")
+    if not api_key:
+        return "Error: OpenAI API key not set in environment variables"
+    # Download the image
+    try:
+        response = requests.get(image_url)
+        response.raise_for_status()
+        image_data = response.content
+        base64_image = base64.b64encode(image_data).decode('utf-8')
+    except Exception as e:
+        return f"Error downloading image: {str(e)}"
+    # Call OpenAI API
+    api_url = "https://api.openai.com/v1/chat/completions"
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {api_key}"
+    }
+    payload = {
+        "model": "gpt-4.1-2025-04-14",
+        "messages": [
             {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": "Describe this image in detail. Include any text, objects, people, actions, and overall context."
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": f"data:image/jpeg;base64,{base64_image}"
+                        }
+                    }
+                ]
             }
+        ],
+        "max_tokens": 500
+    }
+    try:
+        response = requests.post(api_url, headers=headers, json=payload)
+        response.raise_for_status()
+        data = response.json()
+        if "choices" in data and len(data["choices"]) > 0:
+            return data["choices"][0]["message"]["content"]
+        else:
+            return "No description generated"
+    except Exception as e:
+        return f"Error analyzing image: {str(e)}"
+@tool
+def analyze_sound(audio_url: str) -> str:
+    """
+    Transcribe an audio file using OpenAI's Whisper model.
+    Args:
+        audio_url: the url of the audio
+    Returns:
+        A transcription of the audio content
+    """
+    api_key = os.getenv("OPENAI_API_KEY")
+    if not api_key:
+        return "Error: OpenAI API key not set in environment variables"
+    # Download the audio file
+    try:
+        response = requests.get(audio_url)
+        response.raise_for_status()
+        import tempfile
+        with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_file:
+            temp_file.write(response.content)
+            temp_file_path = temp_file.name
+        audio_file= open(temp_file_path, "rb")
     except Exception as e:
+        return f"Error downloading audio: {str(e)}"
+    try:
+        transcription = openAiClient.audio.transcriptions.create(
+            model="gpt-4o-transcribe",
+            file=audio_file
+        )
+        return transcription.text
+    except Exception as e:
+        return f"Error transcribing audio: {str(e)}"
 @tool
+def analyze_excel(excel_url: str) -> str:
     """
+    Process an Excel file and convert it to a text-based format.
     Args:
+        excel_url: URL of the Excel file to analyze
     Returns:
+        A text representation of the Excel data
     """
     try:
+        # Download the Excel file
+        response = requests.get(excel_url)
+        response.raise_for_status()
+        # Save to a temporary file
+        with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as temp_file:
+            temp_file.write(response.content)
+            temp_file_path = temp_file.name
+        # Read the Excel file
+        df = pd.read_excel(temp_file_path)
+        # Convert to a text representation
+        result = []
+        # Add sheet information
+        result.append(f"Excel file with {len(df)} rows and {len(df.columns)} columns")
+        # Add column names
+        result.append("\nColumns:")
+        for i, col in enumerate(df.columns, 1):
+            result.append(f"{i}. {col}")
+        # Add data summary
+        result.append("\nData Summary:")
+        result.append(df.describe().to_string())
+        # Add first few rows as a sample
+        result.append("\nFirst 5 rows:")
+        result.append(df.head().to_string())
+        # Clean up
+        os.unlink(temp_file_path)
+        return "\n".join(result)
+    except Exception as e:
+        return f"Error processing Excel file: {str(e)}"
+@tool
+def analyze_text(text_url: str) -> str:
+    """
+    Process a text file and return its contents.
+    Args:
+        text_url: URL of the text file to analyze
+    Returns:
+        The contents of the text file
+    """
+    try:
+        # Download the text file
+        response = requests.get(text_url)
+        response.raise_for_status()
+        # Get the text content
+        text_content = response.text
+        # For very long files, truncate with a note
+        if len(text_content) > 10000:
+            return f"Text file content (truncated to first 10000 characters):\n\n{text_content[:10000]}\n\n... [content truncated]"
+        return f"Text file content:\n\n{text_content}"
+    except Exception as e:
+        return f"Error processing text file: {str(e)}"
+@tool
+def transcribe_youtube(youtube_url: str) -> str:
+    """
+    Extract the transcript from a YouTube video.
+    Args:
+        youtube_url: URL of the YouTube video
+    Returns:
+        The transcript of the video
+    """
+    try:
+        # Extract video ID from URL
+        import re
+        video_id_match = re.search(r'(?:v=|\/)([0-9A-Za-z_-]{11}).*', youtube_url)
+        if not video_id_match:
+            return "Error: Invalid YouTube URL"
+        video_id = video_id_match.group(1)
+        # Use youtube_transcript_api to get the transcript
+        from youtube_transcript_api import YouTubeTranscriptApi
+        try:
+            transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
+            # Combine all transcript segments into a single text
+            full_transcript = ""
+            for segment in transcript_list:
+                full_transcript += segment['text'] + " "
+            return f"YouTube Video Transcript:\n\n{full_transcript.strip()}"
+        except Exception as e:
+            return f"Error extracting transcript: {str(e)}"
+    except Exception as e:
+        return f"Error processing YouTube video: {str(e)}"
+@tool
+def process_file(task_id: str, file_name: str) -> str:
+    """
+    Fetch and process a file based on task_id and file_name.
+    For images, it will analyze them and return a description of the image.
+    For audio files, it will transcribe them.
+    For Excel files, it will convert them to a text format.
+    For text files, it will return the file contents.
+    Other file types can be ignored for this tool.
+    Args:
+        task_id: The task ID to fetch the file for
+        file_name: The name of the file to process
+    Returns:
+        A description or transcription of the file content
+    """
+    if not task_id or not file_name:
+        return "Error: task_id and file_name are required"
+    # Construct the file URL
+    file_url = f"{DEFAULT_API_URL}/files/{task_id}"
+    try:
+        # Fetch the file
+        response = requests.get(file_url)
+        response.raise_for_status()
+        # Determine file type
+        mime_type, _ = mimetypes.guess_type(file_name)
+        # Process based on file type
+        if mime_type and mime_type.startswith('image/'):
+            # For images, use the analyze_image tool
+            return analyze_image(file_url)
+        elif file_name.lower().endswith('.mp3') or (mime_type and mime_type.startswith('audio/')):
+            # For audio files, use the analyze_sound tool
+            return analyze_sound(file_url)
+        elif file_name.lower().endswith('.xlsx') or (mime_type and mime_type == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'):
+            # For Excel files, use the analyze_excel tool
+            return analyze_excel(file_url)
+        elif file_name.lower().endswith(('.txt', '.py', '.js', '.html', '.css', '.json', '.md')) or (mime_type and mime_type.startswith('text/')):
+            # For text files, use the analyze_text tool
+            return analyze_text(file_url)
+        else:
+            # For other file types, return basic information
+            return f"File '{file_name}' of type '{mime_type or 'unknown'}' was fetched successfully. Content processing not implemented for this file type."
     except Exception as e:
+        return f"Error processing file: {str(e)}"
+class BasicAgent:
+    """
+    A simple agent that uses smolagents.CodeAgent with multiple specialized tools:
+    - Tavily search tool for web searches
+    - Image analysis tool for processing images
+    - Audio transcription tool for processing sound files
+    - Excel analysis tool for processing spreadsheet data
+    - Text file analysis tool for processing code and text files
+    - YouTube transcription tool for processing video content
+    - File processing tool for handling various file types
+    The CodeAgent is instantiated once and reused for each question to reduce overhead.
+    """
     def __init__(self):
         print("BasicAgent initialized.")
+        # Reuse a single CodeAgent instance for all queries
+        self.agent = CodeAgent(tools=[tavily_search, analyze_image, analyze_sound, analyze_excel, analyze_text, transcribe_youtube, process_file], model=model)
+    def __call__(self, question: str) -> str:
+        print(f"Agent received question (first 50 chars): {question[:50]}...")
+        return self.agent.run(question)
 def run_and_submit_all( profile: gr.OAuthProfile | None):