Spaces:

vaishakgkumar
/

stablemed2

Runtime error

vaishakgkumar commited on Dec 30, 2023

Commit

989f6d4

•

1 Parent(s): ed92110

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -68,31 +68,35 @@ class ChatBot:
     def __init__(self):
         self.history = []
-    def predict(self, user_input, system_prompt="You are an expert medical analyst trained on medical datatset:"):
-        # Combine user input and system prompt
-        formatted_input = f"{user_input}{system_prompt}"
-        # Encode user input
-        user_input_ids = tokenizer.encode(formatted_input, return_tensors="pt")
-        # Concatenate the user input with chat history
-        if len(self.history) > 0:
-            chat_history_ids = torch.cat([self.history, user_input_ids], dim=-1)
-        else:
-            chat_history_ids = user_input_ids
-        # Generate a response using the PEFT model
-        response = peft_model.generate(input_ids=chat_history_ids, max_length=512, pad_token_id=tokenizer.eos_token_id)
-        # Update chat history
-        self.history = chat_history_ids
-        # Decode and return the response
-        response_text = tokenizer.decode(response[0], skip_special_tokens=True)
         return response_text
 bot = ChatBot()
 title = "StableDoc Chat"
 description = """
 You can use this Space to test out the current model vaishakgkumar/stablemedv3.

     def __init__(self):
         self.history = []
+def predict(self, user_input, system_prompt="You are an expert analyst and provide assessment:"):
+        prompt = [{'role': 'user', 'content': user_input + "\n" + system_prompt + ":"}]
+        inputs = tokenizer.apply_chat_template(
+            prompt,
+            add_generation_prompt=True,
+            return_tensors='pt'
+        )
+        # Generate a response using the model
+        tokens = peft_model.generate(
+            inputs.to(model.device),
+            max_new_tokens=512,
+            temperature=0.8,
+            do_sample=False
+        )
+        # Decode the response
+        response_text = tokenizer.decode(tokens[0], skip_special_tokens=False)
+        # Free up memory
+        del tokens
+        torch.cuda.empty_cache()
         return response_text
 bot = ChatBot()
 title = "StableDoc Chat"
 description = """
 You can use this Space to test out the current model vaishakgkumar/stablemedv3.