Spaces:
Sleeping
Sleeping
Aditi Tewari
commited on
Commit
·
13e81e3
1
Parent(s):
096eb43
Update backend and frontend
Browse files- soundscripter.html +41 -4
- soundscripter_flaskAPI.py +68 -0
soundscripter.html
CHANGED
@@ -106,16 +106,33 @@
|
|
106 |
|
107 |
if (recordedAudioBlob) {
|
108 |
formData.append("audio", recordedAudioBlob, "recorded_audio.wav");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
} else if (uploadedFile) {
|
110 |
formData.append("audio", uploadedFile, uploadedFile.name);
|
111 |
-
}
|
112 |
|
113 |
-
|
114 |
-
fetch("/asr", {
|
115 |
method: "POST",
|
116 |
body: formData,
|
117 |
})
|
118 |
-
|
119 |
.then((data) => {
|
120 |
if (data.text) {
|
121 |
outputText.value = data.text;
|
@@ -128,6 +145,26 @@
|
|
128 |
outputText.value = "Backend communication failed.";
|
129 |
});
|
130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
if (recordedAudioBlob) {
|
132 |
const downloadLink = document.createElement("a");
|
133 |
downloadLink.href = URL.createObjectURL(recordedAudioBlob);
|
|
|
106 |
|
107 |
if (recordedAudioBlob) {
|
108 |
formData.append("audio", recordedAudioBlob, "recorded_audio.wav");
|
109 |
+
|
110 |
+
fetch("http://localhost:8000/asr/live", {
|
111 |
+
method: "POST",
|
112 |
+
body: formData,
|
113 |
+
})
|
114 |
+
.then((response) => response.json())
|
115 |
+
.then((data) => {
|
116 |
+
if (data.text) {
|
117 |
+
outputText.value = data.text;
|
118 |
+
} else {
|
119 |
+
outputText.value = "No text recognized.";
|
120 |
+
}
|
121 |
+
})
|
122 |
+
.catch((error) => {
|
123 |
+
console.error("Error while connecting with backend", error);
|
124 |
+
outputText.value = "Backend communication failed.";
|
125 |
+
});
|
126 |
+
|
127 |
+
|
128 |
} else if (uploadedFile) {
|
129 |
formData.append("audio", uploadedFile, uploadedFile.name);
|
|
|
130 |
|
131 |
+
fetch("http://localhost:8000/asr", {
|
|
|
132 |
method: "POST",
|
133 |
body: formData,
|
134 |
})
|
135 |
+
.then((response) => response.json())
|
136 |
.then((data) => {
|
137 |
if (data.text) {
|
138 |
outputText.value = data.text;
|
|
|
145 |
outputText.value = "Backend communication failed.";
|
146 |
});
|
147 |
|
148 |
+
}
|
149 |
+
|
150 |
+
|
151 |
+
// fetch("http://localhost:8000/asr", {
|
152 |
+
// method: "POST",
|
153 |
+
// body: formData,
|
154 |
+
// })
|
155 |
+
// .then((response) => response.json())
|
156 |
+
// .then((data) => {
|
157 |
+
// if (data.text) {
|
158 |
+
// outputText.value = data.text;
|
159 |
+
// } else {
|
160 |
+
// outputText.value = "No text recognized.";
|
161 |
+
// }
|
162 |
+
// })
|
163 |
+
// .catch((error) => {
|
164 |
+
// console.error("Error while connecting with backend", error);
|
165 |
+
// outputText.value = "Backend communication failed.";
|
166 |
+
// });
|
167 |
+
|
168 |
if (recordedAudioBlob) {
|
169 |
const downloadLink = document.createElement("a");
|
170 |
downloadLink.href = URL.createObjectURL(recordedAudioBlob);
|
soundscripter_flaskAPI.py
CHANGED
@@ -9,6 +9,9 @@ from fastapi.responses import FileResponse, JSONResponse
|
|
9 |
from fastapi.responses import HTMLResponse
|
10 |
from pydub import AudioSegment
|
11 |
import librosa
|
|
|
|
|
|
|
12 |
from fastapi.middleware.cors import CORSMiddleware
|
13 |
|
14 |
|
@@ -33,6 +36,25 @@ async def read_root():
|
|
33 |
# return HTMLResponse(content=html_content)
|
34 |
return html_content
|
35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
def convert_audio_format(input_data, input_format, output_format='wav'):
|
38 |
# Convert audio data to WAV format
|
@@ -74,6 +96,52 @@ async def transcribe_audio(audio: UploadFile = File(...)):
|
|
74 |
# return {"Text": result}
|
75 |
return JSONResponse(content={"text": result})
|
76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
# @app.post("/asr/live")
|
78 |
# async def transcribe_live_audio():
|
79 |
# fs = 16000 # Target sample rate
|
|
|
9 |
from fastapi.responses import HTMLResponse
|
10 |
from pydub import AudioSegment
|
11 |
import librosa
|
12 |
+
import tempfile
|
13 |
+
import shutil
|
14 |
+
import subprocess
|
15 |
from fastapi.middleware.cors import CORSMiddleware
|
16 |
|
17 |
|
|
|
36 |
# return HTMLResponse(content=html_content)
|
37 |
return html_content
|
38 |
|
39 |
+
def resample_audio(input_path, output_path, target_sample_rate):
|
40 |
+
ffmpeg_cmd = [
|
41 |
+
"ffmpeg",
|
42 |
+
"-i", input_path,
|
43 |
+
"-ar", str(target_sample_rate),
|
44 |
+
output_path
|
45 |
+
]
|
46 |
+
subprocess.run(ffmpeg_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
47 |
+
|
48 |
+
def recognize_speech_live(file_path, language="hi-IN"):
|
49 |
+
with sr.AudioFile(file_path) as source:
|
50 |
+
audio = recognizer.record(source)
|
51 |
+
try:
|
52 |
+
text = recognizer.recognize_google(audio, language=language)
|
53 |
+
return text
|
54 |
+
except sr.UnknownValueError:
|
55 |
+
return "Speech not recognized."
|
56 |
+
except sr.RequestError as e:
|
57 |
+
return f"API request failed: {e}"
|
58 |
|
59 |
def convert_audio_format(input_data, input_format, output_format='wav'):
|
60 |
# Convert audio data to WAV format
|
|
|
96 |
# return {"Text": result}
|
97 |
return JSONResponse(content={"text": result})
|
98 |
|
99 |
+
def get_sampling_rate(audio_file_path):
|
100 |
+
audio = AudioSegment.from_file(audio_file_path)
|
101 |
+
return audio.frame_rate
|
102 |
+
|
103 |
+
|
104 |
+
|
105 |
+
@app.post("/asr/live")
|
106 |
+
async def transcribe_live_audio(audio: UploadFile = File(...)):
|
107 |
+
if not audio:
|
108 |
+
return JSONResponse(content={"success": False}, status_code=400)
|
109 |
+
|
110 |
+
# Check if the uploaded file is in WAV format
|
111 |
+
if audio.content_type != "audio/wav":
|
112 |
+
return JSONResponse(content={"success": False, "message": "Audio must be in WAV format."}, status_code=400)
|
113 |
+
|
114 |
+
try:
|
115 |
+
# Save the received audio to a temporary file
|
116 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
|
117 |
+
temp_file_path = temp_file.name
|
118 |
+
shutil.copyfileobj(audio.file, temp_file)
|
119 |
+
|
120 |
+
# Print the file path for debugging
|
121 |
+
print(temp_file_path)
|
122 |
+
|
123 |
+
# Get the sampling rate of the received audio
|
124 |
+
sampling_rate = get_sampling_rate(temp_file_path)
|
125 |
+
|
126 |
+
# Resample the audio to 16 kHz if needed
|
127 |
+
if sampling_rate != 16000:
|
128 |
+
output_path = tempfile.mktemp(suffix=".wav")
|
129 |
+
resample_audio(temp_file_path, output_path, target_sample_rate=16000)
|
130 |
+
result = recognize_speech(output_path)
|
131 |
+
print(result)
|
132 |
+
else:
|
133 |
+
result = recognize_speech(temp_file_path)
|
134 |
+
print(result)
|
135 |
+
|
136 |
+
except Exception as e:
|
137 |
+
print("Error processing audio:", e)
|
138 |
+
return JSONResponse(content={"success": False, "message": "Error processing audio."}, status_code=500)
|
139 |
+
finally:
|
140 |
+
# Cleanup: remove the temporary received audio file
|
141 |
+
if os.path.exists(temp_file_path):
|
142 |
+
os.remove(temp_file_path)
|
143 |
+
|
144 |
+
return JSONResponse(content={"text": result})
|
145 |
# @app.post("/asr/live")
|
146 |
# async def transcribe_live_audio():
|
147 |
# fs = 16000 # Target sample rate
|