huynhkimthien commited on
Commit
d86709d
·
verified ·
1 Parent(s): d596d0e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -5
app.py CHANGED
@@ -4,6 +4,12 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
4
  import torch
5
  import os
6
  import threading
 
 
 
 
 
 
7
 
8
  app = FastAPI()
9
 
@@ -19,6 +25,7 @@ model = AutoModelForCausalLM.from_pretrained(
19
  device_map="auto",
20
  use_auth_token=HF_TOKEN
21
  )
 
22
  @app.get("/")
23
  def read_root():
24
  return {"message": "Ứng dụng đang chạy!"}
@@ -42,7 +49,7 @@ async def chat(request: ChatRequest):
42
  # Sinh phản hồi từ mô hình
43
  outputs = model.generate(
44
  input_ids=input_ids,
45
- max_new_tokens=100,
46
  temperature=0.7,
47
  top_k=50,
48
  top_p=0.9,
@@ -66,13 +73,24 @@ def keep_model_alive():
66
  add_generation_prompt=True,
67
  return_tensors="pt"
68
  ).to(model.device)
 
 
69
 
70
- _ = model.generate(**inputs, max_new_tokens=10)
71
  except Exception as e:
72
- print("Heartbeat error:", e)
73
  time.sleep(300) # Gọi mỗi 5 phút
74
 
 
 
 
75
  # Khởi động tác vụ nền khi app chạy
 
76
  @app.on_event("startup")
77
- def startup_event():
78
- threading.Thread(target=keep_model_alive, daemon=True).start()
 
 
 
 
 
 
4
  import torch
5
  import os
6
  import threading
7
+ import time
8
+ import logging
9
+
10
+ #Cấu hình logging
11
+ logging.basicConfig(level=logging.INFO)
12
+
13
 
14
  app = FastAPI()
15
 
 
25
  device_map="auto",
26
  use_auth_token=HF_TOKEN
27
  )
28
+ logging.info("Mô hình đã sẵn sàng!")
29
  @app.get("/")
30
  def read_root():
31
  return {"message": "Ứng dụng đang chạy!"}
 
49
  # Sinh phản hồi từ mô hình
50
  outputs = model.generate(
51
  input_ids=input_ids,
52
+ max_new_tokens=80,
53
  temperature=0.7,
54
  top_k=50,
55
  top_p=0.9,
 
73
  add_generation_prompt=True,
74
  return_tensors="pt"
75
  ).to(model.device)
76
+ with torch.inference_mode():
77
+ _ = model.generate(inputs, max_new_tokens=5)
78
 
79
+ logging.info("Heartbeat OK")
80
  except Exception as e:
81
+ logging.error(f"Heartbeat error: {e}")
82
  time.sleep(300) # Gọi mỗi 5 phút
83
 
84
+
85
+
86
+
87
  # Khởi động tác vụ nền khi app chạy
88
+
89
  @app.on_event("startup")
90
+ async def startup_event():
91
+ # Dùng background task thay vì thread
92
+ from asyncio import create_task
93
+ create_task(run_keep_alive())
94
+
95
+ async def run_keep_alive():
96
+ keep_model_alive()