huynhkimthien commited on
Commit
40d82fc
·
verified ·
1 Parent(s): d966327

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -23
app.py CHANGED
@@ -1,35 +1,36 @@
1
- from transformers import AutoModelForCausalLM, AutoTokenizer
2
  from fastapi import FastAPI
 
 
 
 
 
3
  app = FastAPI()
4
  model_name = "Qwen/Qwen3-4B-Instruct-2507"
5
 
6
- # load the tokenizer and the model
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
  model = AutoModelForCausalLM.from_pretrained(
9
  model_name,
10
- dtype="auto",
11
- device_map="auto"
12
  )
13
 
14
- # prepare the model input
15
- prompt = "trả lời bằng tiếng việt, ngắn gọn."
16
- messages = [
17
- {"role": "user", "content": prompt}
18
- ]
19
- text = tokenizer.apply_chat_template(
20
- messages,
21
- tokenize=False,
22
- add_generation_prompt=True,
23
- )
24
- model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
25
 
26
- # conduct text completion
27
- generated_ids = model.generate(
28
- **model_inputs,
29
- max_new_tokens=200
30
- )
31
- output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
 
 
 
32
 
33
- content = tokenizer.decode(output_ids, skip_special_tokens=True)
 
34
 
35
- print("content:", content)
 
 
 
 
1
  from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
+ import torch
5
+ import os
6
+
7
  app = FastAPI()
8
  model_name = "Qwen/Qwen3-4B-Instruct-2507"
9
 
10
+ # Load tokenizer model (CPU)
11
  tokenizer = AutoTokenizer.from_pretrained(model_name)
12
  model = AutoModelForCausalLM.from_pretrained(
13
  model_name,
14
+ device_map={"": "cpu"}, # Spaces Free không có GPU
15
+ torch_dtype=torch.float32
16
  )
17
 
18
+ class ChatRequest(BaseModel):
19
+ message: str
 
 
 
 
 
 
 
 
 
20
 
21
+ @app.get("/")
22
+ def read_root():
23
+ return {"message": "Ứng dụng đang chạy!"}
24
+
25
+ @app.post("/chat")
26
+ async def chat(request: ChatRequest):
27
+ messages = [{"role": "user", "content": request.message}]
28
+ text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
29
+ model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
30
 
31
+ with torch.inference_mode():
32
+ generated_ids = model.generate(**model_inputs, max_new_tokens=200)
33
 
34
+ output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
35
+ content = tokenizer.decode(output_ids, skip_special_tokens=True)
36
+ return {"response": content}