Spaces:

skkjodhpur
/

Gemma-Code-Instruct-Finetune-by-skk

Sleeping

App Files Files Community

skkjodhpur commited on Jul 19, 2024

Commit

b519b92

verified ·

1 Parent(s): a270145

FInal

Browse files

Files changed (1) hide show

app.py +15 -10

app.py CHANGED Viewed

@@ -2,29 +2,33 @@ import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
-# Load model and tokenizer
-model_name = "skkjodhpur/Gemma-Code-Instruct-Finetune-by-skk"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)
-# Move model to GPU if available
-device = "cuda" if torch.cuda.is_available() else "cpu"
 model = model.to(device)
 def generate_text(prompt):
     if not prompt.strip():
         return "Please enter a valid question."
     try:
         input_ids = tokenizer.encode(f"<s>[INST] {prompt} [/INST]", return_tensors="pt").to(device)
         with torch.no_grad():
             output = model.generate(
                 input_ids,
-                max_length=200,
                 num_return_sequences=1,
-                do_sample=True,
-                temperature=0.7,
             )
         generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
         return generated_text
     except Exception as e:
@@ -40,7 +44,8 @@ iface = gr.Interface(
     outputs="text",
     title="Doctors-Patient Chatbot",
     subtitle="Fine-Tuning GEMMA-2B for Doctor-Patient Interaction",
-    description="Ask me any question related to patient concerns. This model is designed for educational and informational purposes only. Please do not use it for medical diagnosis or treatment. Always consult a qualified healthcare provider for medical advice."
 )
 iface.launch(share=True)

 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
+# Load a smaller model and tokenizer
+model_name = "skkjodhpur/Gemma-Code-Instruct-Finetune-by-skk"  # Consider a smaller model if available
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(model_name)
+# Move model to CPU
+device = "cpu"
 model = model.to(device)
 def generate_text(prompt):
     if not prompt.strip():
         return "Please enter a valid question."
     try:
+        # Tokenize input
         input_ids = tokenizer.encode(f"<s>[INST] {prompt} [/INST]", return_tensors="pt").to(device)
+        # Generate text with greedy search for faster response
         with torch.no_grad():
             output = model.generate(
                 input_ids,
+                max_length=100,  # Reduced max length for faster generation
                 num_return_sequences=1,
+                do_sample=False,  # Use greedy search
             )
+        # Decode and return the generated text
         generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
         return generated_text
     except Exception as e:
     outputs="text",
     title="Doctors-Patient Chatbot",
     subtitle="Fine-Tuning GEMMA-2B for Doctor-Patient Interaction",
+    description="Ask me any question related to patient concerns. This model is designed for educational and informational purposes only. Please do not use it for medical diagnosis or treatment. Always consult a qualified healthcare provider for medical advice.",
+    allow_flagging="never",  # Disable flagging if not needed
 )
 iface.launch(share=True)