Spaces:

MJobe
/

document-vqa-v2

Running

App Files Files Community

MJobe commited on Oct 22

Commit

70678a5

•

1 Parent(s): 0945284

Update main.py

Browse files

Files changed (1) hide show

main.py +13 -10

main.py CHANGED Viewed

@@ -7,6 +7,7 @@ from io import BytesIO
 from starlette.middleware import Middleware
 from starlette.middleware.cors import CORSMiddleware
 from pdf2image import convert_from_bytes
 app = FastAPI()
@@ -154,28 +155,29 @@ async def test_classify_text(text: str = Form(...)):
     except Exception as e:
         return JSONResponse(content=f"Error classifying text: {str(e)}", status_code=500)
 @app.post("/transcribe_and_match/", description="Transcribe audio and match responses to form fields.")
 async def transcribe_and_match(
     file: UploadFile = File(...),
     field_data: str = Form(...)
 ):
-    """
-    Transcribe audio and match it to form fields.
-    :param file: The uploaded audio file.
-    :param field_data: A JSON string that contains form field information (field names and IDs).
-    """
     try:
-        # Step 1: Read and transcribe the audio file
         contents = await file.read()
-        transcription_result = nlp_speech_to_text(contents)
         transcription_text = transcription_result['text']
         # Step 2: Parse the field_data (which contains field names/IDs)
-        # Example: [{"field_id": "name_field", "field_label": "Name"}, {"field_id": "email_field", "field_label": "Email"}]
         import json
         fields = json.loads(field_data)
         # Step 3: Find the matching field for the transcription
         field_matches = {}
@@ -196,6 +198,7 @@ async def transcribe_and_match(
     except Exception as e:
         return JSONResponse(content=f"Error processing audio or matching fields: {str(e)}", status_code=500)
 # Set up CORS middleware
 origins = ["*"]  # or specify your list of allowed origins
 app.add_middleware(

 from starlette.middleware import Middleware
 from starlette.middleware.cors import CORSMiddleware
 from pdf2image import convert_from_bytes
+from pydub import AudioSegment
 app = FastAPI()
     except Exception as e:
         return JSONResponse(content=f"Error classifying text: {str(e)}", status_code=500)
 @app.post("/transcribe_and_match/", description="Transcribe audio and match responses to form fields.")
 async def transcribe_and_match(
     file: UploadFile = File(...),
     field_data: str = Form(...)
 ):
     try:
+        # Step 1: Read and convert the audio file
         contents = await file.read()
+        audio = AudioSegment.from_file(BytesIO(contents))
+        # Optionally convert to wav if needed
+        wav_io = BytesIO()
+        audio.export(wav_io, format="wav")
+        wav_io.seek(0)
+        # Transcribe the WAV audio file
+        transcription_result = nlp_speech_to_text(wav_io)
         transcription_text = transcription_result['text']
         # Step 2: Parse the field_data (which contains field names/IDs)
         import json
         fields = json.loads(field_data)
         # Step 3: Find the matching field for the transcription
         field_matches = {}
     except Exception as e:
         return JSONResponse(content=f"Error processing audio or matching fields: {str(e)}", status_code=500)
 # Set up CORS middleware
 origins = ["*"]  # or specify your list of allowed origins
 app.add_middleware(