Edmond7 commited on
Commit
ab17173
1 Parent(s): 9cf4194

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -10
app.py CHANGED
@@ -3,7 +3,9 @@ import math
3
  import time
4
  import base64
5
  import os
6
- from typing import Dict, Any
 
 
7
  from functools import wraps
8
 
9
  from fastapi import FastAPI, Depends, HTTPException, File, UploadFile, Form, Header
@@ -74,6 +76,42 @@ def check_api_key(x_api_key: str = Header(...)):
74
  raise HTTPException(status_code=401, detail="Invalid or missing API key")
75
  return x_api_key
76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  @app.post("/transcribe_audio_file")
78
  @timeit
79
  async def transcribe_audio_file(
@@ -86,15 +124,29 @@ async def transcribe_audio_file(
86
  logger.debug(f"Received parameters - task: {task}, return_timestamps: {return_timestamps}")
87
 
88
  try:
89
- audio_data = await file.read()
90
- file_size = len(audio_data)
91
  file_size_mb = file_size / (1024 * 1024)
92
- logger.debug(f"Audio file size: {file_size} bytes ({file_size_mb:.2f}MB)")
93
- except Exception as e:
94
- logger.error(f"Error reading audio file: {str(e)}", exc_info=True)
95
- raise HTTPException(status_code=400, detail=f"Error reading audio file: {str(e)}")
96
 
97
- return await process_audio(audio_data, file_size_mb, task, return_timestamps)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
  @app.post("/transcribe_audio_base64")
100
  @timeit
@@ -223,5 +275,4 @@ def format_timestamp(seconds: float, always_include_hours: bool = False, decimal
223
  return f"{hours_marker}{minutes:02d}:{seconds:02d}{decimal_marker}{milliseconds:03d}"
224
  else:
225
  # we have a malformed timestamp so just return it as is
226
- return seconds
227
-
 
3
  import time
4
  import base64
5
  import os
6
+ import subprocess
7
+ import tempfile
8
+ from typing import Dict, Any, Union, Tuple
9
  from functools import wraps
10
 
11
  from fastapi import FastAPI, Depends, HTTPException, File, UploadFile, Form, Header
 
76
  raise HTTPException(status_code=401, detail="Invalid or missing API key")
77
  return x_api_key
78
 
79
+ def extract_audio_from_video(video_data: bytes) -> bytes:
80
+ """Extract audio from video file using ffmpeg."""
81
+ with tempfile.NamedTemporaryFile(suffix='.mp4', delete=True) as video_file:
82
+ with tempfile.NamedTemporaryFile(suffix='.wav', delete=True) as audio_file:
83
+ # Write video data to temporary file
84
+ video_file.write(video_data)
85
+ video_file.flush()
86
+
87
+ try:
88
+ # Extract audio to WAV format
89
+ subprocess.run([
90
+ 'ffmpeg',
91
+ '-i', video_file.name,
92
+ '-vn', # Disable video
93
+ '-acodec', 'pcm_s16le', # Convert to PCM WAV
94
+ '-ar', '16000', # Set sample rate to 16kHz
95
+ '-ac', '1', # Convert to mono
96
+ '-y', # Overwrite output file
97
+ audio_file.name
98
+ ], check=True, capture_output=True)
99
+
100
+ # Read the extracted audio
101
+ return audio_file.read()
102
+
103
+ except subprocess.CalledProcessError as e:
104
+ logger.error(f"FFmpeg error: {e.stderr.decode() if e.stderr else str(e)}")
105
+ raise HTTPException(
106
+ status_code=400,
107
+ detail="Error extracting audio from video file. Make sure it's a valid video file."
108
+ )
109
+
110
+ def is_video_file(file_name: str) -> bool:
111
+ """Check if the file is a video based on its extension."""
112
+ video_extensions = {'.mp4', '.avi', '.mov', '.mkv', '.wmv', '.flv'}
113
+ return any(file_name.lower().endswith(ext) for ext in video_extensions)
114
+
115
  @app.post("/transcribe_audio_file")
116
  @timeit
117
  async def transcribe_audio_file(
 
124
  logger.debug(f"Received parameters - task: {task}, return_timestamps: {return_timestamps}")
125
 
126
  try:
127
+ file_data = await file.read()
128
+ file_size = len(file_data)
129
  file_size_mb = file_size / (1024 * 1024)
130
+ logger.debug(f"File size: {file_size} bytes ({file_size_mb:.2f}MB)")
 
 
 
131
 
132
+ # Check if the file is a video and extract audio if needed
133
+ if is_video_file(file.filename):
134
+ logger.debug("Processing video file")
135
+ try:
136
+ file_data = extract_audio_from_video(file_data)
137
+ logger.debug("Successfully extracted audio from video")
138
+ except Exception as e:
139
+ logger.error(f"Error processing video file: {str(e)}", exc_info=True)
140
+ raise HTTPException(
141
+ status_code=500,
142
+ detail=f"Error processing video file: {str(e)}"
143
+ )
144
+
145
+ return await process_audio(file_data, file_size_mb, task, return_timestamps)
146
+
147
+ except Exception as e:
148
+ logger.error(f"Error reading file: {str(e)}", exc_info=True)
149
+ raise HTTPException(status_code=400, detail=f"Error reading file: {str(e)}")
150
 
151
  @app.post("/transcribe_audio_base64")
152
  @timeit
 
275
  return f"{hours_marker}{minutes:02d}:{seconds:02d}{decimal_marker}{milliseconds:03d}"
276
  else:
277
  # we have a malformed timestamp so just return it as is
278
+ return seconds