Aditi Tewari commited on
Commit
13e81e3
·
1 Parent(s): 096eb43

Update backend and frontend

Browse files
Files changed (2) hide show
  1. soundscripter.html +41 -4
  2. soundscripter_flaskAPI.py +68 -0
soundscripter.html CHANGED
@@ -106,16 +106,33 @@
106
 
107
  if (recordedAudioBlob) {
108
  formData.append("audio", recordedAudioBlob, "recorded_audio.wav");
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  } else if (uploadedFile) {
110
  formData.append("audio", uploadedFile, uploadedFile.name);
111
- }
112
 
113
-
114
- fetch("/asr", {
115
  method: "POST",
116
  body: formData,
117
  })
118
- .then((response) => response.json())
119
  .then((data) => {
120
  if (data.text) {
121
  outputText.value = data.text;
@@ -128,6 +145,26 @@
128
  outputText.value = "Backend communication failed.";
129
  });
130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  if (recordedAudioBlob) {
132
  const downloadLink = document.createElement("a");
133
  downloadLink.href = URL.createObjectURL(recordedAudioBlob);
 
106
 
107
  if (recordedAudioBlob) {
108
  formData.append("audio", recordedAudioBlob, "recorded_audio.wav");
109
+
110
+ fetch("http://localhost:8000/asr/live", {
111
+ method: "POST",
112
+ body: formData,
113
+ })
114
+ .then((response) => response.json())
115
+ .then((data) => {
116
+ if (data.text) {
117
+ outputText.value = data.text;
118
+ } else {
119
+ outputText.value = "No text recognized.";
120
+ }
121
+ })
122
+ .catch((error) => {
123
+ console.error("Error while connecting with backend", error);
124
+ outputText.value = "Backend communication failed.";
125
+ });
126
+
127
+
128
  } else if (uploadedFile) {
129
  formData.append("audio", uploadedFile, uploadedFile.name);
 
130
 
131
+ fetch("http://localhost:8000/asr", {
 
132
  method: "POST",
133
  body: formData,
134
  })
135
+ .then((response) => response.json())
136
  .then((data) => {
137
  if (data.text) {
138
  outputText.value = data.text;
 
145
  outputText.value = "Backend communication failed.";
146
  });
147
 
148
+ }
149
+
150
+
151
+ // fetch("http://localhost:8000/asr", {
152
+ // method: "POST",
153
+ // body: formData,
154
+ // })
155
+ // .then((response) => response.json())
156
+ // .then((data) => {
157
+ // if (data.text) {
158
+ // outputText.value = data.text;
159
+ // } else {
160
+ // outputText.value = "No text recognized.";
161
+ // }
162
+ // })
163
+ // .catch((error) => {
164
+ // console.error("Error while connecting with backend", error);
165
+ // outputText.value = "Backend communication failed.";
166
+ // });
167
+
168
  if (recordedAudioBlob) {
169
  const downloadLink = document.createElement("a");
170
  downloadLink.href = URL.createObjectURL(recordedAudioBlob);
soundscripter_flaskAPI.py CHANGED
@@ -9,6 +9,9 @@ from fastapi.responses import FileResponse, JSONResponse
9
  from fastapi.responses import HTMLResponse
10
  from pydub import AudioSegment
11
  import librosa
 
 
 
12
  from fastapi.middleware.cors import CORSMiddleware
13
 
14
 
@@ -33,6 +36,25 @@ async def read_root():
33
  # return HTMLResponse(content=html_content)
34
  return html_content
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  def convert_audio_format(input_data, input_format, output_format='wav'):
38
  # Convert audio data to WAV format
@@ -74,6 +96,52 @@ async def transcribe_audio(audio: UploadFile = File(...)):
74
  # return {"Text": result}
75
  return JSONResponse(content={"text": result})
76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  # @app.post("/asr/live")
78
  # async def transcribe_live_audio():
79
  # fs = 16000 # Target sample rate
 
9
  from fastapi.responses import HTMLResponse
10
  from pydub import AudioSegment
11
  import librosa
12
+ import tempfile
13
+ import shutil
14
+ import subprocess
15
  from fastapi.middleware.cors import CORSMiddleware
16
 
17
 
 
36
  # return HTMLResponse(content=html_content)
37
  return html_content
38
 
39
+ def resample_audio(input_path, output_path, target_sample_rate):
40
+ ffmpeg_cmd = [
41
+ "ffmpeg",
42
+ "-i", input_path,
43
+ "-ar", str(target_sample_rate),
44
+ output_path
45
+ ]
46
+ subprocess.run(ffmpeg_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
47
+
48
+ def recognize_speech_live(file_path, language="hi-IN"):
49
+ with sr.AudioFile(file_path) as source:
50
+ audio = recognizer.record(source)
51
+ try:
52
+ text = recognizer.recognize_google(audio, language=language)
53
+ return text
54
+ except sr.UnknownValueError:
55
+ return "Speech not recognized."
56
+ except sr.RequestError as e:
57
+ return f"API request failed: {e}"
58
 
59
  def convert_audio_format(input_data, input_format, output_format='wav'):
60
  # Convert audio data to WAV format
 
96
  # return {"Text": result}
97
  return JSONResponse(content={"text": result})
98
 
99
+ def get_sampling_rate(audio_file_path):
100
+ audio = AudioSegment.from_file(audio_file_path)
101
+ return audio.frame_rate
102
+
103
+
104
+
105
+ @app.post("/asr/live")
106
+ async def transcribe_live_audio(audio: UploadFile = File(...)):
107
+ if not audio:
108
+ return JSONResponse(content={"success": False}, status_code=400)
109
+
110
+ # Check if the uploaded file is in WAV format
111
+ if audio.content_type != "audio/wav":
112
+ return JSONResponse(content={"success": False, "message": "Audio must be in WAV format."}, status_code=400)
113
+
114
+ try:
115
+ # Save the received audio to a temporary file
116
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
117
+ temp_file_path = temp_file.name
118
+ shutil.copyfileobj(audio.file, temp_file)
119
+
120
+ # Print the file path for debugging
121
+ print(temp_file_path)
122
+
123
+ # Get the sampling rate of the received audio
124
+ sampling_rate = get_sampling_rate(temp_file_path)
125
+
126
+ # Resample the audio to 16 kHz if needed
127
+ if sampling_rate != 16000:
128
+ output_path = tempfile.mktemp(suffix=".wav")
129
+ resample_audio(temp_file_path, output_path, target_sample_rate=16000)
130
+ result = recognize_speech(output_path)
131
+ print(result)
132
+ else:
133
+ result = recognize_speech(temp_file_path)
134
+ print(result)
135
+
136
+ except Exception as e:
137
+ print("Error processing audio:", e)
138
+ return JSONResponse(content={"success": False, "message": "Error processing audio."}, status_code=500)
139
+ finally:
140
+ # Cleanup: remove the temporary received audio file
141
+ if os.path.exists(temp_file_path):
142
+ os.remove(temp_file_path)
143
+
144
+ return JSONResponse(content={"text": result})
145
  # @app.post("/asr/live")
146
  # async def transcribe_live_audio():
147
  # fs = 16000 # Target sample rate