stts

Runtime error

App Files Files Community

Edmond7 commited on 25 days ago

Commit

ab17173

•

1 Parent(s): 9cf4194

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -10

app.py CHANGED Viewed

@@ -3,7 +3,9 @@ import math
 import time
 import base64
 import os
-from typing import Dict, Any
 from functools import wraps
 from fastapi import FastAPI, Depends, HTTPException, File, UploadFile, Form, Header
@@ -74,6 +76,42 @@ def check_api_key(x_api_key: str = Header(...)):
         raise HTTPException(status_code=401, detail="Invalid or missing API key")
     return x_api_key
 @app.post("/transcribe_audio_file")
 @timeit
 async def transcribe_audio_file(
@@ -86,15 +124,29 @@ async def transcribe_audio_file(
     logger.debug(f"Received parameters - task: {task}, return_timestamps: {return_timestamps}")
     try:
-        audio_data = await file.read()
-        file_size = len(audio_data)
         file_size_mb = file_size / (1024 * 1024)
-        logger.debug(f"Audio file size: {file_size} bytes ({file_size_mb:.2f}MB)")
-    except Exception as e:
-        logger.error(f"Error reading audio file: {str(e)}", exc_info=True)
-        raise HTTPException(status_code=400, detail=f"Error reading audio file: {str(e)}")
-    return await process_audio(audio_data, file_size_mb, task, return_timestamps)
 @app.post("/transcribe_audio_base64")
 @timeit
@@ -223,5 +275,4 @@ def format_timestamp(seconds: float, always_include_hours: bool = False, decimal
         return f"{hours_marker}{minutes:02d}:{seconds:02d}{decimal_marker}{milliseconds:03d}"
     else:
         # we have a malformed timestamp so just return it as is
-        return seconds

 import time
 import base64
 import os
+import subprocess
+import tempfile
+from typing import Dict, Any, Union, Tuple
 from functools import wraps
 from fastapi import FastAPI, Depends, HTTPException, File, UploadFile, Form, Header
         raise HTTPException(status_code=401, detail="Invalid or missing API key")
     return x_api_key
+def extract_audio_from_video(video_data: bytes) -> bytes:
+    """Extract audio from video file using ffmpeg."""
+    with tempfile.NamedTemporaryFile(suffix='.mp4', delete=True) as video_file:
+        with tempfile.NamedTemporaryFile(suffix='.wav', delete=True) as audio_file:
+            # Write video data to temporary file
+            video_file.write(video_data)
+            video_file.flush()
+            try:
+                # Extract audio to WAV format
+                subprocess.run([
+                    'ffmpeg',
+                    '-i', video_file.name,
+                    '-vn',  # Disable video
+                    '-acodec', 'pcm_s16le',  # Convert to PCM WAV
+                    '-ar', '16000',  # Set sample rate to 16kHz
+                    '-ac', '1',  # Convert to mono
+                    '-y',  # Overwrite output file
+                    audio_file.name
+                ], check=True, capture_output=True)
+                # Read the extracted audio
+                return audio_file.read()
+            except subprocess.CalledProcessError as e:
+                logger.error(f"FFmpeg error: {e.stderr.decode() if e.stderr else str(e)}")
+                raise HTTPException(
+                    status_code=400,
+                    detail="Error extracting audio from video file. Make sure it's a valid video file."
+                )
+def is_video_file(file_name: str) -> bool:
+    """Check if the file is a video based on its extension."""
+    video_extensions = {'.mp4', '.avi', '.mov', '.mkv', '.wmv', '.flv'}
+    return any(file_name.lower().endswith(ext) for ext in video_extensions)
 @app.post("/transcribe_audio_file")
 @timeit
 async def transcribe_audio_file(
     logger.debug(f"Received parameters - task: {task}, return_timestamps: {return_timestamps}")
     try:
+        file_data = await file.read()
+        file_size = len(file_data)
         file_size_mb = file_size / (1024 * 1024)
+        logger.debug(f"File size: {file_size} bytes ({file_size_mb:.2f}MB)")
+        # Check if the file is a video and extract audio if needed
+        if is_video_file(file.filename):
+            logger.debug("Processing video file")
+            try:
+                file_data = extract_audio_from_video(file_data)
+                logger.debug("Successfully extracted audio from video")
+            except Exception as e:
+                logger.error(f"Error processing video file: {str(e)}", exc_info=True)
+                raise HTTPException(
+                    status_code=500,
+                    detail=f"Error processing video file: {str(e)}"
+                )
+        return await process_audio(file_data, file_size_mb, task, return_timestamps)
+    except Exception as e:
+        logger.error(f"Error reading file: {str(e)}", exc_info=True)
+        raise HTTPException(status_code=400, detail=f"Error reading file: {str(e)}")
 @app.post("/transcribe_audio_base64")
 @timeit
         return f"{hours_marker}{minutes:02d}:{seconds:02d}{decimal_marker}{milliseconds:03d}"
     else:
         # we have a malformed timestamp so just return it as is
+        return seconds