Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
@@ -168,7 +168,11 @@ async def transcribe_and_match(
|
|
168 |
contents = await file.read()
|
169 |
audio = AudioSegment.from_file(BytesIO(contents))
|
170 |
|
171 |
-
# Step 2:
|
|
|
|
|
|
|
|
|
172 |
wav_buffer = BytesIO()
|
173 |
audio.export(wav_buffer, format="wav")
|
174 |
wav_buffer.seek(0)
|
@@ -179,14 +183,14 @@ async def transcribe_and_match(
|
|
179 |
# Convert waveform to float32
|
180 |
samples = waveform.numpy().astype(np.float32)
|
181 |
|
182 |
-
# Step
|
183 |
transcription_result = nlp_speech_to_text(samples)
|
184 |
transcription_text = transcription_result['text']
|
185 |
|
186 |
-
# Step
|
187 |
fields = json.loads(field_data)
|
188 |
|
189 |
-
# Step
|
190 |
field_matches = {}
|
191 |
for field in fields:
|
192 |
field_label = field.get("field_label", "").lower()
|
@@ -196,7 +200,7 @@ async def transcribe_and_match(
|
|
196 |
if field_label in transcription_text.lower():
|
197 |
field_matches[field_id] = transcription_text
|
198 |
|
199 |
-
# Step
|
200 |
return {
|
201 |
"transcription": transcription_text,
|
202 |
"matched_fields": field_matches
|
|
|
168 |
contents = await file.read()
|
169 |
audio = AudioSegment.from_file(BytesIO(contents))
|
170 |
|
171 |
+
# Step 2: Ensure audio is mono
|
172 |
+
if audio.channels > 1:
|
173 |
+
audio = audio.set_channels(1) # Convert to mono
|
174 |
+
|
175 |
+
# Step 3: Export to WAV format and load with torchaudio
|
176 |
wav_buffer = BytesIO()
|
177 |
audio.export(wav_buffer, format="wav")
|
178 |
wav_buffer.seek(0)
|
|
|
183 |
# Convert waveform to float32
|
184 |
samples = waveform.numpy().astype(np.float32)
|
185 |
|
186 |
+
# Step 4: Use the speech-to-text model
|
187 |
transcription_result = nlp_speech_to_text(samples)
|
188 |
transcription_text = transcription_result['text']
|
189 |
|
190 |
+
# Step 5: Parse the field_data (which contains field names/IDs)
|
191 |
fields = json.loads(field_data)
|
192 |
|
193 |
+
# Step 6: Find the matching field for the transcription
|
194 |
field_matches = {}
|
195 |
for field in fields:
|
196 |
field_label = field.get("field_label", "").lower()
|
|
|
200 |
if field_label in transcription_text.lower():
|
201 |
field_matches[field_id] = transcription_text
|
202 |
|
203 |
+
# Step 7: Return transcription + matched fields
|
204 |
return {
|
205 |
"transcription": transcription_text,
|
206 |
"matched_fields": field_matches
|