Zai

Running

huynhkimthien commited on 19 days ago

Commit

78895c8

verified ·

1 Parent(s): 6dd9fa3

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,28 +1,15 @@
-from transformers import AutoModelForCausalLM, AutoTokenizer,BitsAndBytesConfig
-import os
-from fastapi import FastAPI
-import torch
-app = FastAPI()
 model_name = "Qwen/Qwen3-4B-Instruct-2507"
-HF_TOKEN = os.environ["HF_TOKEN"]  # Token được cấu hình trong Hugging Face Secrets
-bnb_config = BitsAndBytesConfig(
-    load_in_4bit=True,
-    bnb_4bit_compute_dtype=torch.float16,
-    bnb_4bit_use_double_quant=True,
-    bnb_4bit_quant_type="nf4"
-)
 # load the tokenizer and the model
-tokenizer = AutoTokenizer.from_pretrained(model_name,token=HF_TOKEN)
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
-    quantization_config=bnb_config,
     device_map="auto"
 )
-logging.info("Mô hình đã sẵn sàng!")
 # prepare the model input
 prompt = "trả lời bằng tiếng việt, ngắn gọn."
 messages = [
@@ -38,7 +25,7 @@ model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
 # conduct text completion
 generated_ids = model.generate(
     **model_inputs,
-    max_new_tokens=512
 )
 output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()

+from transformers import AutoModelForCausalLM, AutoTokenizer
 model_name = "Qwen/Qwen3-4B-Instruct-2507"
 # load the tokenizer and the model
+tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
+    torch_dtype="auto",
     device_map="auto"
 )
 # prepare the model input
 prompt = "trả lời bằng tiếng việt, ngắn gọn."
 messages = [
 # conduct text completion
 generated_ids = model.generate(
     **model_inputs,
+    max_new_tokens=200
 )
 output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()