huynhkimthien commited on
Commit
78895c8
·
verified ·
1 Parent(s): 6dd9fa3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -18
app.py CHANGED
@@ -1,28 +1,15 @@
1
- from transformers import AutoModelForCausalLM, AutoTokenizer,BitsAndBytesConfig
2
- import os
3
- from fastapi import FastAPI
4
- import torch
5
-
6
- app = FastAPI()
7
 
8
  model_name = "Qwen/Qwen3-4B-Instruct-2507"
9
- HF_TOKEN = os.environ["HF_TOKEN"] # Token được cấu hình trong Hugging Face Secrets
10
-
11
- bnb_config = BitsAndBytesConfig(
12
- load_in_4bit=True,
13
- bnb_4bit_compute_dtype=torch.float16,
14
- bnb_4bit_use_double_quant=True,
15
- bnb_4bit_quant_type="nf4"
16
- )
17
 
18
  # load the tokenizer and the model
19
- tokenizer = AutoTokenizer.from_pretrained(model_name,token=HF_TOKEN)
20
  model = AutoModelForCausalLM.from_pretrained(
21
  model_name,
22
- quantization_config=bnb_config,
23
  device_map="auto"
24
  )
25
- logging.info("Mô hình đã sẵn sàng!")
26
  # prepare the model input
27
  prompt = "trả lời bằng tiếng việt, ngắn gọn."
28
  messages = [
@@ -38,7 +25,7 @@ model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
38
  # conduct text completion
39
  generated_ids = model.generate(
40
  **model_inputs,
41
- max_new_tokens=512
42
  )
43
  output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
44
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer
 
 
 
 
 
2
 
3
  model_name = "Qwen/Qwen3-4B-Instruct-2507"
 
 
 
 
 
 
 
 
4
 
5
  # load the tokenizer and the model
6
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
7
  model = AutoModelForCausalLM.from_pretrained(
8
  model_name,
9
+ torch_dtype="auto",
10
  device_map="auto"
11
  )
12
+
13
  # prepare the model input
14
  prompt = "trả lời bằng tiếng việt, ngắn gọn."
15
  messages = [
 
25
  # conduct text completion
26
  generated_ids = model.generate(
27
  **model_inputs,
28
+ max_new_tokens=200
29
  )
30
  output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
31