import os import shutil import subprocess from fastapi import FastAPI, File, UploadFile, Form from fastapi.responses import FileResponse, JSONResponse from fastapi.responses import HTMLResponse from pydub import AudioSegment import shutil import tempfile import speech_recognition as sr import os r = sr.Recognizer() app = FastAPI() def resample_audio(input_path, output_path, target_sample_rate): ffmpeg_cmd = [ "ffmpeg", "-i", input_path, "-ar", str(target_sample_rate), output_path ] subprocess.run(ffmpeg_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) @app.get("/", response_class=HTMLResponse) async def read_root(): # Provide the path to the HTML file containing the front-end code with open("soundscripter.html", "r") as file: html_content = file.read() return html_content def get_sampling_rate(audio_file_path): audio = AudioSegment.from_file(audio_file_path) return audio.frame_rate @app.post("/process_audio") async def process_audio(audio: UploadFile = File(...), language: str = Form(...)): if not audio or not language: return JSONResponse(content={"success": False}, status_code=400) # Check if the uploaded file is in WAV format if audio.content_type != "audio/wav": return JSONResponse(content={"success": False, "message": "Audio must be in WAV format."}, status_code=400) try: # Save the received audio to a temporary file with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file: temp_file_path = temp_file.name shutil.copyfileobj(audio.file, temp_file) # Print the file path for debugging print(temp_file_path) output_path = tempfile.mktemp(suffix=".wav") # Resample the audio to 16000 Hz resample_audio(temp_file_path, output_path, target_sample_rate=16000) print(output_path) # Get the sampling rate of the received audio sampling_rate = get_sampling_rate(output_path) # Resample the audio to 16 kHz if needed if sampling_rate != 16000: return JSONResponse(content={"success": False, "message": "Sample rate is not 16000Hz."}, status_code=500) except Exception as e: print("Error processing audio:", e) return JSONResponse(content={"success": False, "message": "Error processing audio."}, status_code=500) # finally: # # Cleanup: remove the temporary received audio file # if os.path.exists(audio_file_path): # os.remove(audio_file_path) return JSONResponse(content={"success": True, "language":calling_asr(output_path,"hi-IN")}) def calling_asr(wav_file,lid): AUDIO_FILE=wav_file # aud_name=AUDIO_FILE.split('/')[-1].split('.')[0] file=open(wav_file+".txt","w") text="cant read wav file" try: with sr.AudioFile(AUDIO_FILE) as source: audio = r.record(source) text = r.recognize_google(audio, language=lid) #file.write(aud_name +"\t"+text) return text except: #file.write(" "+"Error in segement"+" ") return text #file.close()