huynhkimthien commited on
Commit
da03d90
·
verified ·
1 Parent(s): 3c282a5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -49
app.py CHANGED
@@ -1,29 +1,21 @@
1
- from fastapi import FastAPI, File, UploadFile
2
- from fastapi.responses import FileResponse
3
  from pydantic import BaseModel
4
  from transformers import AutoModelForCausalLM, AutoTokenizer
5
- import whisper
6
  import torch
7
- from gtts import gTTS
8
  import os
9
- hf_token = os.getenv("HF_TOKEN")
10
- app = FastAPI()
11
 
12
- # Load Qwen model
13
  model_name = "Qwen/Qwen3-4B-Instruct-2507"
14
- tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=hf_token)
 
 
15
  model = AutoModelForCausalLM.from_pretrained(
16
  model_name,
17
- use_auth_token=hf_token,
18
  device_map={"": "cpu"},
19
  torch_dtype=torch.float32
20
  )
21
 
22
- # Load Whisper model
23
- whisper_model = whisper.load_model("base")
24
-
25
- # Lưu hội thoại
26
- conversation = [{"role": "system", "content": "Bạn là một trợ lý AI. Hãy trả lời ngắn gọn, súc tích, tối đa 2 câu."}]
27
 
28
  class ChatRequest(BaseModel):
29
  message: str
@@ -32,55 +24,25 @@ class ChatRequest(BaseModel):
32
  def read_root():
33
  return {"message": "Ứng dụng đang chạy!"}
34
 
35
- # Endpoint chat text
36
  @app.post("/chat")
37
  async def chat(request: ChatRequest):
38
  conversation.append({"role": "user", "content": request.message})
39
- text = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
40
- model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
41
- response_text = generate_full_response(model_inputs)
42
- conversation.append({"role": "assistant", "content": response_text})
43
- return {"response": response_text}
44
-
45
- # Endpoint voice chat + TTS
46
- @app.post("/voice_chat")
47
- async def voice_chat(file: UploadFile = File(...)):
48
- # Lưu file tạm
49
- file_location = f"temp_{file.filename}"
50
- with open(file_location, "wb") as f:
51
- f.write(await file.read())
52
 
53
- # Chuyển âm thanh thành text
54
- result = whisper_model.transcribe(file_location, language="vi")
55
- user_text = result["text"]
56
-
57
- # Gọi mô hình Qwen để trả lời
58
- conversation.append({"role": "user", "content": user_text})
59
  text = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
60
  model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
 
61
  response_text = generate_full_response(model_inputs)
62
  conversation.append({"role": "assistant", "content": response_text})
63
 
64
- # Tạo file âm thanh từ phản hồi
65
- tts = gTTS(response_text, lang="vi")
66
- audio_file = "response.mp3"
67
- tts.save(audio_file)
68
-
69
- return {
70
- "user_text": user_text,
71
- "response": response_text,
72
- "audio_url": f"/get_audio"
73
- }
74
 
75
- # Endpoint trả về file âm thanh
76
- @app.get("/get_audio")
77
- async def get_audio():
78
- return FileResponse("response.mp3", media_type="audio/mpeg")
79
 
80
- # Hàm sinh phản hồi
81
  def generate_full_response(model_inputs, max_new_tokens=64):
82
  with torch.inference_mode():
83
  generated_ids = model.generate(**model_inputs, max_new_tokens=max_new_tokens)
84
  output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
85
  response_text = tokenizer.decode(output_ids, skip_special_tokens=True)
 
 
86
  return response_text.strip()
 
1
+ from fastapi import FastAPI
 
2
  from pydantic import BaseModel
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
4
  import torch
 
5
  import os
 
 
6
 
7
+ app = FastAPI()
8
  model_name = "Qwen/Qwen3-4B-Instruct-2507"
9
+
10
+ # Load tokenizer và model (CPU cho Spaces Free)
11
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
12
  model = AutoModelForCausalLM.from_pretrained(
13
  model_name,
 
14
  device_map={"": "cpu"},
15
  torch_dtype=torch.float32
16
  )
17
 
18
+ conversation = [{"role": "system", "content": "Bạn là một trợ lý AI. Hãy trả lời ngắn gọn, súc tích, tối đa 2 câu."}] # Lưu hội thoại
 
 
 
 
19
 
20
  class ChatRequest(BaseModel):
21
  message: str
 
24
  def read_root():
25
  return {"message": "Ứng dụng đang chạy!"}
26
 
 
27
  @app.post("/chat")
28
  async def chat(request: ChatRequest):
29
  conversation.append({"role": "user", "content": request.message})
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
+ # Áp dụng template hội thoại
 
 
 
 
 
32
  text = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
33
  model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
34
+
35
  response_text = generate_full_response(model_inputs)
36
  conversation.append({"role": "assistant", "content": response_text})
37
 
38
+ return {"response": response_text}
 
 
 
 
 
 
 
 
 
39
 
 
 
 
 
40
 
 
41
  def generate_full_response(model_inputs, max_new_tokens=64):
42
  with torch.inference_mode():
43
  generated_ids = model.generate(**model_inputs, max_new_tokens=max_new_tokens)
44
  output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
45
  response_text = tokenizer.decode(output_ids, skip_special_tokens=True)
46
+
47
+
48
  return response_text.strip()