Spaces:
Sleeping
Sleeping
File size: 3,287 Bytes
ee1587c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
import os
import shutil
import subprocess
from fastapi import FastAPI, File, UploadFile, Form
from fastapi.responses import FileResponse, JSONResponse
from fastapi.responses import HTMLResponse
from pydub import AudioSegment
import shutil
import tempfile
import speech_recognition as sr
import os
r = sr.Recognizer()
app = FastAPI()
def resample_audio(input_path, output_path, target_sample_rate):
ffmpeg_cmd = [
"ffmpeg",
"-i", input_path,
"-ar", str(target_sample_rate),
output_path
]
subprocess.run(ffmpeg_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
@app.get("/", response_class=HTMLResponse)
async def read_root():
# Provide the path to the HTML file containing the front-end code
with open("soundscripter.html", "r") as file:
html_content = file.read()
return html_content
def get_sampling_rate(audio_file_path):
audio = AudioSegment.from_file(audio_file_path)
return audio.frame_rate
@app.post("/process_audio")
async def process_audio(audio: UploadFile = File(...), language: str = Form(...)):
if not audio or not language:
return JSONResponse(content={"success": False}, status_code=400)
# Check if the uploaded file is in WAV format
if audio.content_type != "audio/wav":
return JSONResponse(content={"success": False, "message": "Audio must be in WAV format."}, status_code=400)
try:
# Save the received audio to a temporary file
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
temp_file_path = temp_file.name
shutil.copyfileobj(audio.file, temp_file)
# Print the file path for debugging
print(temp_file_path)
output_path = tempfile.mktemp(suffix=".wav")
# Resample the audio to 16000 Hz
resample_audio(temp_file_path, output_path, target_sample_rate=16000)
print(output_path)
# Get the sampling rate of the received audio
sampling_rate = get_sampling_rate(output_path)
# Resample the audio to 16 kHz if needed
if sampling_rate != 16000:
return JSONResponse(content={"success": False, "message": "Sample rate is not 16000Hz."}, status_code=500)
except Exception as e:
print("Error processing audio:", e)
return JSONResponse(content={"success": False, "message": "Error processing audio."}, status_code=500)
# finally:
# # Cleanup: remove the temporary received audio file
# if os.path.exists(audio_file_path):
# os.remove(audio_file_path)
return JSONResponse(content={"success": True, "language":calling_asr(output_path,"hi-IN")})
def calling_asr(wav_file,lid):
AUDIO_FILE=wav_file
# aud_name=AUDIO_FILE.split('/')[-1].split('.')[0]
file=open(wav_file+".txt","w")
text="cant read wav file"
try:
with sr.AudioFile(AUDIO_FILE) as source:
audio = r.record(source)
text = r.recognize_google(audio, language=lid)
#file.write(aud_name +"\t"+text)
return text
except:
#file.write(" "+"Error in segement"+" ")
return text
#file.close()
|