vaishakgkumar commited on
Commit
2241485
1 Parent(s): 443561d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -29
app.py CHANGED
@@ -11,44 +11,50 @@ from huggingface_hub import login
11
  hf_token = os.environ.get('HUGGINGFACE_TOKEN')
12
  login(hf_token)
13
 
14
- # Define the device
15
- device = "cuda" if torch.cuda.is_available() else "cpu"
16
 
17
  # Load tokenizer and model
18
- tokenizer = AutoTokenizer.from_pretrained('stabilityai/stablelm-3b-4e1t',token=hf_token, trust_remote_code=True)
19
- config = PeftConfig.from_pretrained("vaishakgkumar/stablemedv1")
20
- model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-3b-4e1t",token=hf_token, trust_remote_code=True)
21
- model = PeftModel.from_pretrained(model, "vaishakgkumar/stablemedv1",token=hf_token)
 
 
 
 
 
 
 
22
 
23
- model.to(device)
 
 
 
24
 
25
  class ChatBot:
26
  def __init__(self):
27
  self.history = []
28
 
29
- def predict(self, user_input, system_prompt="You are an expert analyst and provide assessment:"):
30
- prompt = [{'role': 'user', 'content': user_input + "\n" + system_prompt + ":"}]
31
- inputs = tokenizer.apply_chat_template(
32
- prompt,
33
- add_generation_prompt=True,
34
- return_tensors='pt'
35
- )
36
-
37
- # Generate a response using the model
38
- tokens = model.generate(
39
- inputs.to(model.device),
40
- max_new_tokens=250,
41
- temperature=0.8,
42
- do_sample=False
43
- )
44
-
45
- # Decode the response
46
- response_text = tokenizer.decode(tokens[0], skip_special_tokens=False)
47
-
48
- # Free up memory
49
- del tokens
50
- torch.cuda.empty_cache()
51
 
 
 
52
  return response_text
53
 
54
  bot = ChatBot()
 
11
  hf_token = os.environ.get('HUGGINGFACE_TOKEN')
12
  login(hf_token)
13
 
 
 
14
 
15
  # Load tokenizer and model
16
+ device = "cuda" if torch.cuda.is_available() else "cpu"
17
+
18
+ # Use the base model's ID
19
+ base_model_id = "stabilityai/stablelm-3b-4e1t"
20
+ model_directory = "vaishakgkumar/stablemedv1"
21
+
22
+ # Instantiate the Tokenizer
23
+ tokenizer = AutoTokenizer.from_pretrained("stabilityai/stablelm-3b-4e1t", token=hf_token, trust_remote_code=True, padding_side="left")
24
+ # tokenizer = AutoTokenizer.from_pretrained("Tonic/stablemed", trust_remote_code=True, padding_side="left")
25
+ tokenizer.pad_token = tokenizer.eos_token
26
+ tokenizer.padding_side = 'left'
27
 
28
+ # Load the PEFT model
29
+ peft_config = PeftConfig.from_pretrained("vaishakgkumar/stablemedv1", token=hf_token)
30
+ peft_model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-3b-4e1t", token=hf_token, trust_remote_code=True)
31
+ peft_model = PeftModel.from_pretrained(peft_model, "vaishakgkumar/stablemedv1", token=hf_token)
32
 
33
  class ChatBot:
34
  def __init__(self):
35
  self.history = []
36
 
37
+ def predict(self, user_input, system_prompt="You are an expert medical analyst:"):
38
+ # Combine user input and system prompt
39
+ formatted_input = f"{user_input}{system_prompt}"
40
+
41
+ # Encode user input
42
+ user_input_ids = tokenizer.encode(formatted_input, return_tensors="pt")
43
+
44
+ # Concatenate the user input with chat history
45
+ if len(self.history) > 0:
46
+ chat_history_ids = torch.cat([self.history, user_input_ids], dim=-1)
47
+ else:
48
+ chat_history_ids = user_input_ids
49
+
50
+ # Generate a response using the PEFT model
51
+ response = peft_model.generate(input_ids=chat_history_ids, max_length=1200, pad_token_id=tokenizer.eos_token_id)
52
+
53
+ # Update chat history
54
+ self.history = chat_history_ids
 
 
 
 
55
 
56
+ # Decode and return the response
57
+ response_text = tokenizer.decode(response[0], skip_special_tokens=True)
58
  return response_text
59
 
60
  bot = ChatBot()