Spaces:

MJobe
/

document-vqa-v2

Running

App Files Files Community

MJobe commited on Oct 25

Commit

22c526e

•

1 Parent(s): fbe5c6d

Update main.py

Browse files

Files changed (1) hide show

main.py +32 -0

main.py CHANGED Viewed

@@ -222,6 +222,38 @@ async def transcribe_and_answer(
         logging.error(f"General error: {e}")
         raise HTTPException(status_code=500, detail="Internal Server Error")
 # Set up CORS middleware
 origins = ["*"]  # or specify your list of allowed origins
 app.add_middleware(

         logging.error(f"General error: {e}")
         raise HTTPException(status_code=500, detail="Internal Server Error")
+@app.post("/test-transcription/", description="Upload an audio file to test transcription using speech_recognition.")
+async def test_transcription(file: UploadFile = File(...)):
+    try:
+        # Check if the file format is supported
+        if file.content_type not in ["audio/wav", "audio/mpeg", "audio/mp3"]:
+            raise HTTPException(status_code=400, detail="Unsupported audio format. Please upload a WAV or MP3 file.")
+        # Convert uploaded file to WAV if necessary for compatibility with SpeechRecognition
+        audio_data = await file.read()
+        audio_file = io.BytesIO(audio_data)
+        if file.content_type in ["audio/mpeg", "audio/mp3"]:
+            # Convert MP3 to WAV
+            audio = AudioSegment.from_file(audio_file, format="mp3")
+            audio_wav = io.BytesIO()
+            audio.export(audio_wav, format="wav")
+            audio_wav.seek(0)
+        else:
+            audio_wav = audio_file
+        # Transcribe audio using speech_recognition
+        recognizer = sr.Recognizer()
+        with sr.AudioFile(audio_wav) as source:
+            audio = recognizer.record(source)
+            transcription = recognizer.recognize_google(audio)
+        # Return the transcription
+        return {"transcription": transcription}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error during transcription: {str(e)}")
 # Set up CORS middleware
 origins = ["*"]  # or specify your list of allowed origins
 app.add_middleware(