Zai

Running

App Files Files Community

huynhkimthien commited on 16 days ago

Commit

e1784e4

verified ·

1 Parent(s): 00c4c8a

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -4

app.py CHANGED Viewed

@@ -74,7 +74,7 @@ async def chat(request: ChatRequest):
     return {"response": response_text}
 # Endpoint voice chat + TTS
-@app.post("/voice_chat")
 async def voice_chat(file: UploadFile = File(...)):
     file_location = f"temp_{file.filename}"
     with open(file_location, "wb") as f:
@@ -82,9 +82,29 @@ async def voice_chat(file: UploadFile = File(...)):
     result = whisper_model.transcribe(file_location, language="vi")
     user_text = result["text"]
-    os.remove(file_location)
-    # Kiểm tra yêu cầu mở nhạc
     if any(kw in user_text.lower() for kw in ["nghe nhạc", "mở bài hát", "bài hát", "bài"]):
         song_name = extract_song_name(user_text)
         if song_name:

     return {"response": response_text}
 # Endpoint voice chat + TTS
+"""@app.post("/voice_chat")
 async def voice_chat(file: UploadFile = File(...)):
     file_location = f"temp_{file.filename}"
     with open(file_location, "wb") as f:
     result = whisper_model.transcribe(file_location, language="vi")
     user_text = result["text"]
+    os.remove(file_location)"""
+import io
+import numpy as np
+import scipy.io.wavfile as wav
+from fastapi import FastAPI, Request
+from fastapi.responses import JSONResponse app = FastAPI()
+@app.post("/voice_chat") async def voice_chat(request: Request):
+    # Đọc dữ liệu âm thanh thô từ ESP32 gửi lên
+    raw_audio = await request.body()
+    # Giả sử âm thanh là PCM 16-bit mono, sample rate 16000 Hz
+    sample_rate = 16000
+    audio_np = np.frombuffer(raw_audio, dtype=np.int16)
+    # Chuyển thành file WAV trong bộ nhớ
+    wav_io = io.BytesIO()
+    wav.write(wav_io, sample_rate, audio_np) wav_io.seek(0)
+    # Lưu file WAV tạm để dùng với Whisper
+    with open("temp_audio.wav", "wb") as f: f.write(wav_io.read())
+    # Gọi Whisper để chuyển âm thanh thành văn bản
+    import whisper model = whisper.load_model("base")
+    result = model.transcribe("temp_audio.wav", language="vi")
+    user_text = result["text"]
+    return JSONResponse(content={"text": user_text})
+# Kiểm tra yêu cầu mở nhạc
     if any(kw in user_text.lower() for kw in ["nghe nhạc", "mở bài hát", "bài hát", "bài"]):
         song_name = extract_song_name(user_text)
         if song_name: