File size: 8,873 Bytes
aef3b1e 49bbbf9 aef3b1e 49bbbf9 aef3b1e 49bbbf9 aef3b1e 49bbbf9 c2659b2 49bbbf9 aef3b1e 49bbbf9 aef3b1e 49bbbf9 aef3b1e 49bbbf9 aef3b1e 49bbbf9 aef3b1e 49bbbf9 aef3b1e 49bbbf9 aef3b1e 49bbbf9 aef3b1e 49bbbf9 aef3b1e 49bbbf9 aef3b1e 49bbbf9 aef3b1e 49bbbf9 aef3b1e 49bbbf9 aef3b1e 49bbbf9 aef3b1e 49bbbf9 aef3b1e 49bbbf9 e6cd41c aef3b1e 49bbbf9 aef3b1e 49bbbf9 e6cd41c aef3b1e e6cd41c aef3b1e 49bbbf9 aef3b1e 49bbbf9 aef3b1e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 |
from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware
import os
import shutil
import uuid
import tempfile
import datetime
import time
from contextlib import contextmanager
# Import analysis functions (assumed to be modified to accept transcript)
from fluency.fluency_api import main as analyze_fluency_main
from tone_modulation.tone_api import main as analyze_tone_main
from vcs.vcs_api import main as analyze_vcs_main
from vers.vers_api import main as analyze_vers_main
from voice_confidence_score.voice_confidence_api import main as analyze_voice_confidence_main
from vps.vps_api import main as analyze_vps_main
from ves.ves import calc_voice_engagement_score
from transcribe import transcribe_audio
from filler_count.filler_score import analyze_fillers
from emotion.emo_predict import predict_emotion
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # Replace with specific domains in production
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
ALLOWED_EXTENSIONS = {'.wav', '.mp3', '.m4a', '.mp4', '.flac'}
@contextmanager
def temp_file_handler(upload_file: UploadFile):
"""Context manager to handle temporary file creation and cleanup."""
temp_dir = "temp_uploads"
os.makedirs(temp_dir, exist_ok=True)
temp_filename = f"temp_{uuid.uuid4()}{os.path.splitext(upload_file.filename)[1]}"
temp_filepath = os.path.join(temp_dir, temp_filename)
try:
with open(temp_filepath, "wb") as buffer:
shutil.copyfileobj(upload_file.file, buffer)
yield temp_filepath
finally:
if os.path.exists(temp_filepath):
os.remove(temp_filepath)
def validate_file_extension(filename: str):
"""Validate if the file extension is allowed."""
if not os.path.splitext(filename)[1].lower() in ALLOWED_EXTENSIONS:
raise HTTPException(
status_code=400,
detail="Invalid file type. Only .wav, .mp3, .m4a, .mp4, and .flac files are supported."
)
async def process_audio_file(upload_file: UploadFile, analysis_func, **kwargs):
"""Generic function to process an audio file with a given analysis function."""
validate_file_extension(upload_file.filename)
with temp_file_handler(upload_file) as temp_filepath:
try:
result = analysis_func(temp_filepath, **kwargs)
return JSONResponse(content=result)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)}")
@app.post("/analyze_fluency/")
async def analyze_fluency(file: UploadFile):
return await process_audio_file(file, analyze_fluency_main, model_size="base")
@app.post('/analyze_tone/')
async def analyze_tone(file: UploadFile):
return await process_audio_file(file, analyze_tone_main)
@app.post('/analyze_vcs/')
async def analyze_vcs(file: UploadFile):
return await process_audio_file(file, analyze_vcs_main)
@app.post('/analyze_vers/')
async def analyze_vers(file: UploadFile):
return await process_audio_file(file, analyze_vers_main)
@app.post('/voice_confidence/')
async def analyze_voice_confidence(file: UploadFile):
return await process_audio_file(file, analyze_voice_confidence_main)
@app.post('/analyze_vps/')
async def analyze_vps(file: UploadFile):
return await process_audio_file(file, analyze_vps_main)
@app.post('/voice_engagement_score/')
async def analyze_voice_engagement_score(file: UploadFile):
return await process_audio_file(file, calc_voice_engagement_score)
@app.post('/analyze_fillers/')
async def analyze_fillers_count(file: UploadFile):
return await process_audio_file(file, analyze_fillers)
@app.post('/transcribe/')
async def transcribe(file: UploadFile):
validate_file_extension(file.filename)
start_time = time.time()
with temp_file_handler(file) as temp_filepath:
try:
transcript, language, _ = transcribe_audio(temp_filepath, model_size="base")
end_time = time.time()
response = {
"transcription": transcript,
"transcription_time": end_time - start_time,
"language": language
}
return JSONResponse(content=response)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Transcription failed: {str(e)}")
@app.post('/analyze_all/')
async def analyze_all(file: UploadFile):
"""Endpoint to analyze all aspects of an uploaded audio file with single transcription."""
print(f"Received request at {datetime.datetime.now()} for file: {file.filename}")
validate_file_extension(file.filename)
with temp_file_handler(file) as temp_filepath:
try:
# Generate transcript once
transcript, language, _ = transcribe_audio(temp_filepath, model_size="base")
# Pass transcript to analysis functions that support it
analyze_all_start = time.time()
# Compute filler count
filler_start = time.time()
filler_count = analyze_fillers(temp_filepath)
filler_count_number = filler_count.get("total_fillers", 0)
filler_end = time.time()
print(f"Filler analysis time: {filler_end - filler_start} seconds")
fluency_start = time.time()
fluency_result = analyze_fluency_main(temp_filepath, model_size="base", filler_count = filler_count_number)
fluency_score = fluency_result['fluency_score']
fluency_end = time.time()
print(f"Fluency analysis time: {fluency_end - fluency_start} seconds")
tone_start = time.time()
tone_result = analyze_tone_main(temp_filepath)
tone_end = time.time()
print(f"Tone analysis time: {tone_end - tone_start} seconds")
vcs_start = time.time()
vcs_result = analyze_vcs_main(temp_filepath)
vcs_end = time.time()
print(f"VCS analysis time: {vcs_end - vcs_start} seconds")
vers_start = time.time()
vers_result = analyze_vers_main(temp_filepath, model_size="base", filler_count = filler_count_number)
vers_end = time.time()
print(f"VERS analysis time: {vers_end - vers_start} seconds")
voice_confidence_start = time.time()
voice_confidence_result = analyze_voice_confidence_main(temp_filepath, model_size="base", filler_count = filler_count_number, fluency_score = fluency_score)
print("voice_confidence_result:", voice_confidence_result)
voice_confidence_end = time.time()
print(f"Voice confidence analysis time: {voice_confidence_end - voice_confidence_start} seconds")
vps_start = time.time()
vps_result = analyze_vps_main(temp_filepath)
vps_end = time.time()
print(f"VPS analysis time: {vps_end - vps_start} seconds")
ves_start = time.time()
ves_result = calc_voice_engagement_score(temp_filepath)
ves_end = time.time()
print(f"VES analysis time: {ves_end - ves_start} seconds")
emotion_start = time.time()
emotion = predict_emotion(temp_filepath)
emotion_end = time.time()
print(f"Emotion analysis time: {emotion_end - emotion_start} seconds")
# Calculate average score
avg_score = (
fluency_result['fluency_score'] +
tone_result['speech_dynamism_score'] +
vcs_result['Voice Clarity Sore'] +
vers_result['VERS Score'] +
voice_confidence_result['voice_confidence_score'] +
vps_result['VPS'] +
ves_result['ves']
) / 7
analyze_all_end = time.time()
# Combine results
combined_result = {
"fluency": fluency_result,
"tone": tone_result,
"vcs": vcs_result,
"vers": vers_result,
"voice_confidence": voice_confidence_result,
"vps": vps_result,
"ves": ves_result,
"filler_words": filler_count,
"transcript": transcript,
"Detected Language": language,
"emotion": emotion,
"sank_score": avg_score,
"analysis_time": analyze_all_end - analyze_all_start,
}
return JSONResponse(content=combined_result)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)}")
|