swapniild1601 commited on
Commit
f0f57c7
·
verified ·
1 Parent(s): 93de4fc

Create hf_model_adapter.py

Browse files
Files changed (1) hide show
  1. hf_model_adapter.py +36 -0
hf_model_adapter.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForCausalLM
2
+ import torch
3
+
4
+ class HFLocalModelAdapter:
5
+ """
6
+ Minimal Hugging Face model adapter for text generation.
7
+ """
8
+
9
+ def __init__(self, model_name="stabilityai/stablelm-3b-4e1t", device=None):
10
+ self.model_name = model_name
11
+ self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
12
+ print(f"Loading {model_name} on {self.device} ...")
13
+ self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
14
+ self.model = AutoModelForCausalLM.from_pretrained(
15
+ model_name,
16
+ torch_dtype=torch.float16 if "cuda" in self.device else torch.float32,
17
+ low_cpu_mem_usage=True,
18
+ device_map="auto" if "cuda" in self.device else None,
19
+ )
20
+ self.model.to(self.device)
21
+ print("Model loaded.")
22
+
23
+ def generate(self, prompt, max_new_tokens=250, temperature=0.7, top_p=0.95):
24
+ inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
25
+ out = self.model.generate(
26
+ **inputs,
27
+ max_new_tokens=max_new_tokens,
28
+ do_sample=True,
29
+ temperature=temperature,
30
+ top_p=top_p,
31
+ pad_token_id=self.tokenizer.eos_token_id,
32
+ )
33
+ decoded = self.tokenizer.decode(out[0], skip_special_tokens=True)
34
+ if decoded.startswith(prompt):
35
+ return decoded[len(prompt):].strip()
36
+ return decoded.strip()