Spaces:

adititewari
/

SoundScripter

Sleeping

App Files Files Community

Aditi Tewari commited on Feb 4, 2024

Commit

13e81e3

1 Parent(s): 096eb43

Update backend and frontend

Browse files

Files changed (2) hide show

soundscripter.html +41 -4
soundscripter_flaskAPI.py +68 -0

soundscripter.html CHANGED Viewed

@@ -106,16 +106,33 @@
       if (recordedAudioBlob) {
         formData.append("audio", recordedAudioBlob, "recorded_audio.wav");
       } else if (uploadedFile) {
         formData.append("audio", uploadedFile, uploadedFile.name);
-      }
-      fetch("/asr", {
         method: "POST",
         body: formData,
       })
-        .then((response) => response.json())
         .then((data) => {
           if (data.text) {
             outputText.value = data.text;
@@ -128,6 +145,26 @@
           outputText.value = "Backend communication failed.";
         });
       if (recordedAudioBlob) {
         const downloadLink = document.createElement("a");
         downloadLink.href = URL.createObjectURL(recordedAudioBlob);

       if (recordedAudioBlob) {
         formData.append("audio", recordedAudioBlob, "recorded_audio.wav");
+        fetch("http://localhost:8000/asr/live", {
+        method: "POST",
+        body: formData,
+      })
+      .then((response) => response.json())
+        .then((data) => {
+          if (data.text) {
+            outputText.value = data.text;
+          } else {
+            outputText.value = "No text recognized.";
+          }
+        })
+        .catch((error) => {
+          console.error("Error while connecting with backend", error);
+          outputText.value = "Backend communication failed.";
+        });
       } else if (uploadedFile) {
         formData.append("audio", uploadedFile, uploadedFile.name);
+        fetch("http://localhost:8000/asr", {
         method: "POST",
         body: formData,
       })
+      .then((response) => response.json())
         .then((data) => {
           if (data.text) {
             outputText.value = data.text;
           outputText.value = "Backend communication failed.";
         });
+      }
+      // fetch("http://localhost:8000/asr", {
+      //   method: "POST",
+      //   body: formData,
+      // })
+        // .then((response) => response.json())
+        // .then((data) => {
+        //   if (data.text) {
+        //     outputText.value = data.text;
+        //   } else {
+        //     outputText.value = "No text recognized.";
+        //   }
+        // })
+        // .catch((error) => {
+        //   console.error("Error while connecting with backend", error);
+        //   outputText.value = "Backend communication failed.";
+        // });
       if (recordedAudioBlob) {
         const downloadLink = document.createElement("a");
         downloadLink.href = URL.createObjectURL(recordedAudioBlob);

soundscripter_flaskAPI.py CHANGED Viewed

@@ -9,6 +9,9 @@ from fastapi.responses import FileResponse, JSONResponse
 from fastapi.responses import HTMLResponse
 from pydub import AudioSegment
 import librosa
 from fastapi.middleware.cors import CORSMiddleware
@@ -33,6 +36,25 @@ async def read_root():
     # return HTMLResponse(content=html_content)
     return html_content
 def convert_audio_format(input_data, input_format, output_format='wav'):
     # Convert audio data to WAV format
@@ -74,6 +96,52 @@ async def transcribe_audio(audio: UploadFile = File(...)):
     # return {"Text": result}
     return JSONResponse(content={"text": result})
 # @app.post("/asr/live")
 # async def transcribe_live_audio():
 #     fs = 16000  # Target sample rate

 from fastapi.responses import HTMLResponse
 from pydub import AudioSegment
 import librosa
+import tempfile
+import shutil
+import subprocess
 from fastapi.middleware.cors import CORSMiddleware
     # return HTMLResponse(content=html_content)
     return html_content
+def resample_audio(input_path, output_path, target_sample_rate):
+    ffmpeg_cmd = [
+        "ffmpeg",
+        "-i", input_path,
+        "-ar", str(target_sample_rate),
+        output_path
+    ]
+    subprocess.run(ffmpeg_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+def recognize_speech_live(file_path, language="hi-IN"):
+    with sr.AudioFile(file_path) as source:
+        audio = recognizer.record(source)
+    try:
+        text = recognizer.recognize_google(audio, language=language)
+        return text
+    except sr.UnknownValueError:
+        return "Speech not recognized."
+    except sr.RequestError as e:
+        return f"API request failed: {e}"
 def convert_audio_format(input_data, input_format, output_format='wav'):
     # Convert audio data to WAV format
     # return {"Text": result}
     return JSONResponse(content={"text": result})
+def get_sampling_rate(audio_file_path):
+    audio = AudioSegment.from_file(audio_file_path)
+    return audio.frame_rate
+@app.post("/asr/live")
+async def transcribe_live_audio(audio: UploadFile = File(...)):
+    if not audio:
+        return JSONResponse(content={"success": False}, status_code=400)
+    # Check if the uploaded file is in WAV format
+    if audio.content_type != "audio/wav":
+        return JSONResponse(content={"success": False, "message": "Audio must be in WAV format."}, status_code=400)
+    try:
+        # Save the received audio to a temporary file
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
+           temp_file_path = temp_file.name
+           shutil.copyfileobj(audio.file, temp_file)
+        # Print the file path for debugging
+        print(temp_file_path)
+        # Get the sampling rate of the received audio
+        sampling_rate = get_sampling_rate(temp_file_path)
+        # Resample the audio to 16 kHz if needed
+        if sampling_rate != 16000:
+            output_path = tempfile.mktemp(suffix=".wav")
+            resample_audio(temp_file_path, output_path, target_sample_rate=16000)
+            result = recognize_speech(output_path)
+            print(result)
+        else:
+            result = recognize_speech(temp_file_path)
+            print(result)
+    except Exception as e:
+        print("Error processing audio:", e)
+        return JSONResponse(content={"success": False, "message": "Error processing audio."}, status_code=500)
+    finally:
+        # Cleanup: remove the temporary received audio file
+        if os.path.exists(temp_file_path):
+            os.remove(temp_file_path)
+    return JSONResponse(content={"text": result})
 # @app.post("/asr/live")
 # async def transcribe_live_audio():
 #     fs = 16000  # Target sample rate