Update funcs.py
Browse files
funcs.py
CHANGED
|
@@ -101,10 +101,14 @@ import torch
|
|
| 101 |
|
| 102 |
# Load model once globally for reuse
|
| 103 |
tokenizer = AutoTokenizer.from_pretrained("tiiuae/Falcon-H1-1.5B-Deep-Instruct")
|
| 104 |
-
|
| 105 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
def generate_falcon_response(prompt, max_new_tokens=300):
|
| 107 |
-
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
| 108 |
outputs = model.generate(
|
| 109 |
**inputs,
|
| 110 |
max_new_tokens=max_new_tokens,
|
|
@@ -115,6 +119,7 @@ def generate_falcon_response(prompt, max_new_tokens=300):
|
|
| 115 |
decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 116 |
return decoded_output[len(prompt):].strip()
|
| 117 |
|
|
|
|
| 118 |
def summarize_and_recommend(therapy_session_conversation):
|
| 119 |
|
| 120 |
session_time = str(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
|
|
|
|
| 101 |
|
| 102 |
# Load model once globally for reuse
|
| 103 |
tokenizer = AutoTokenizer.from_pretrained("tiiuae/Falcon-H1-1.5B-Deep-Instruct")
|
| 104 |
+
use_cuda = torch.cuda.is_available()
|
| 105 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 106 |
+
"tiiuae/Falcon-H1-1.5B-Deep-Instruct",
|
| 107 |
+
torch_dtype=torch.float16 if use_cuda else torch.float32,
|
| 108 |
+
device_map="auto" if use_cuda else None
|
| 109 |
+
)
|
| 110 |
def generate_falcon_response(prompt, max_new_tokens=300):
|
| 111 |
+
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1800).to(model.device)
|
| 112 |
outputs = model.generate(
|
| 113 |
**inputs,
|
| 114 |
max_new_tokens=max_new_tokens,
|
|
|
|
| 119 |
decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 120 |
return decoded_output[len(prompt):].strip()
|
| 121 |
|
| 122 |
+
|
| 123 |
def summarize_and_recommend(therapy_session_conversation):
|
| 124 |
|
| 125 |
session_time = str(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
|