Spaces:

r3Vibe
/

mother-tongue

Runtime error

App Files Files Community

r3Vibe commited on Jun 2

Commit

24f6bf4

•

1 Parent(s): 5d431d1

update

Browse files

Files changed (4) hide show

app/main.py +9 -3
app/mfcc.py +1 -2
app/routers/V1/voice/voice_router.py +26 -8
app/transcriber.py +2 -4

app/main.py CHANGED Viewed

@@ -11,9 +11,14 @@ from app.routers import routes
 app = FastAPI(
     title="Mother Tongue Voice Matcher",
     version="0.0.5",
-    servers=[{
-        "url": "http://127.0.0.1:8000/api/v1", "description": "Local Server"
-    }],
     root_path="/api/v1",
     root_path_in_servers=False,
 )
@@ -29,6 +34,7 @@ origins = [
     "http://127.0.0.1:8080",
     "http://127.0.0.1:3000",
     "http://127.0.0.1:5173",
 ]
 app.add_middleware(

 app = FastAPI(
     title="Mother Tongue Voice Matcher",
     version="0.0.5",
+    servers=[
+        {
+            "url": "http://127.0.0.1:8000/api/v1",
+            "description": "Local Server",
+            "url": "https://r3vibe-mother-tongue.hf.space/api/v1",
+            "description": "Huggingface Server",
+        }
+    ],
     root_path="/api/v1",
     root_path_in_servers=False,
 )
     "http://127.0.0.1:8080",
     "http://127.0.0.1:3000",
     "http://127.0.0.1:5173",
+    "https://r3vibe-mother-tongue.hf.space",
 ]
 app.add_middleware(

app/mfcc.py CHANGED Viewed

@@ -27,8 +27,7 @@ def calculate_mfcc(audio_data, sample_rate):
 def calculate_similarity(mfccs1, mfccs2):
-    similarity = cosine_similarity(
-        mfccs1.reshape(1, -1), mfccs2.reshape(1, -1))
     return similarity[0][0]

 def calculate_similarity(mfccs1, mfccs2):
+    similarity = cosine_similarity(mfccs1.reshape(1, -1), mfccs2.reshape(1, -1))
     return similarity[0][0]

app/routers/V1/voice/voice_router.py CHANGED Viewed

@@ -14,29 +14,47 @@ router = APIRouter(prefix="/voice", tags=["Voice"])
 @router.post("/transcribe")
 async def transcribe_audio(
-    file: Annotated[UploadFile, File()], matcher_text: Annotated[str, Body()]
 ):
     try:
         # Validate file type
-        if not file.filename.endswith(".wav"):
             raise HTTPException(
                 status_code=status.HTTP_400_BAD_REQUEST,
                 detail="Invalid file type. Please upload a wav file.",
             )
         # Read file bytes
-        file_bytes = await file.read()
-        filename = f"audio_{int(time.time())}.wav"
         # Save the file temporarily
-        with open(filename, "wb") as buffer:
-            buffer.write(file_bytes)
         try:
-            text = get_transcription(filename)
             percent = match(matcher_text, text)
             if int(percent) > 50:
-                Euclidean, Cosine = mfcc_similarty_check(filename, filename)
                 return JSONResponse(
                     {
                         "transcription": text,

 @router.post("/transcribe")
 async def transcribe_audio(
+    original: Annotated[UploadFile, File()],
+    recorded: Annotated[UploadFile, File()],
+    matcher_text: Annotated[str, Body()],
 ):
     try:
         # Validate file type
+        if not original.filename.endswith(".wav"):
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail="Invalid file type. Please upload a wav file.",
+            )
+        if not recorded.filename.endswith(".wav"):
             raise HTTPException(
                 status_code=status.HTTP_400_BAD_REQUEST,
                 detail="Invalid file type. Please upload a wav file.",
             )
         # Read file bytes
+        original_bytes = await original.read()
+        filename_original = f"audio_{int(time.time())}_original.wav"
         # Save the file temporarily
+        with open(filename_original, "wb") as buffer:
+            buffer.write(original_bytes)
+        # Read file bytes
+        recorded_bytes = await recorded.read()
+        filename_recorded = f"audio_{int(time.time())}_recorded.wav"
+        # Save the file temporarily
+        with open(filename_recorded, "wb") as buffer:
+            buffer.write(recorded_bytes)
         try:
+            text = get_transcription(filename_recorded)
             percent = match(matcher_text, text)
             if int(percent) > 50:
+                Euclidean, Cosine = mfcc_similarty_check(filename_original, filename_recorded)
                 return JSONResponse(
                     {
                         "transcription": text,

app/transcriber.py CHANGED Viewed

@@ -9,8 +9,7 @@ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
 model_id = "openai/whisper-large-v3"
 model = AutoModelForSpeechSeq2Seq.from_pretrained(
-    model_id,
-    torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
 )
 model.to(device)
@@ -30,8 +29,7 @@ pipe = pipeline(
     device=device,
 )
-dataset = load_dataset(
-    "distil-whisper/librispeech_long", "clean", split="validation")
 sample = dataset[0]["audio"]

 model_id = "openai/whisper-large-v3"
 model = AutoModelForSpeechSeq2Seq.from_pretrained(
+    model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
 )
 model.to(device)
     device=device,
 )
+dataset = load_dataset("distil-whisper/librispeech_long", "clean", split="validation")
 sample = dataset[0]["audio"]