Spaces:

made1570
/

TestingModelAPI

Paused

App Files Files Community

made1570 commited on Apr 18

Commit

258e5e7

verified ·

1 Parent(s): 22dbba3

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -60

app.py CHANGED Viewed

@@ -1,78 +1,35 @@
-import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
 from peft import PeftModel
 import os
-# Set the environment variable for debugging (you can remove this in production)
-os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
-# Load model and tokenizer
 base_model_name = "adarsh3601/my_gemma_pt3"
-adapter_name = "your_adapter_name_here"  # Replace with actual adapter name if needed
 device = "cuda" if torch.cuda.is_available() else "cpu"
-# Load the tokenizer and model
-tokenizer = AutoTokenizer.from_pretrained(base_model_name)
 base_model = AutoModelForCausalLM.from_pretrained(
     base_model_name,
-    device_map="auto",  # Using device_map="auto" for automatic GPU assignment
-    torch_dtype=torch.float32,  # Switch to float32 to avoid precision issues
-    load_in_4bit=True  # This should still be set if your model supports it
 )
-# Load the adapter model
 model = PeftModel.from_pretrained(base_model, adapter_name)
 model.to(device)
-# Ensure the model is in evaluation mode
-model.eval()
-# Chat function with added input/output validation
 def chat(message):
-    # Tokenize input message
     inputs = tokenizer(message, return_tensors="pt")
-    # Check if any input token contains NaN or Inf
-    if torch.any(torch.isnan(inputs['input_ids'])) or torch.any(torch.isinf(inputs['input_ids'])):
-        return "Input contains invalid values (NaN or Inf). Please check the input."
-    # Move tensors to the correct device
-    inputs = {k: v.to(device).half() for k, v in inputs.items()}  # Using half precision for performance
-    try:
-        # Generate response
-        outputs = model.generate(**inputs, max_new_tokens=150, do_sample=True)
-        # Check for NaNs or Infs in the output
-        if torch.any(torch.isnan(outputs)) or torch.any(torch.isinf(outputs)):
-            return "Model output contains invalid values (NaN or Inf). Please try again."
-        # Decode the response
-        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    except Exception as e:
-        # Catch any errors that occur during generation and return them
-        response = f"Unexpected error: {str(e)}"
     return response
-# Gradio interface for the chat
-import gradio as gr
-def gradio_interface():
-    with gr.Blocks() as demo:
-        gr.Markdown("## Chat with Gemma Model")
-        with gr.Row():
-            message_input = gr.Textbox(label="Input Message")
-            output = gr.Textbox(label="Model Response")
-        # Button to trigger the chat
-        button = gr.Button("Generate Response")
-        button.click(fn=chat, inputs=message_input, outputs=output)
-    demo.launch()
-if __name__ == "__main__":
-    gradio_interface()

 from transformers import AutoTokenizer, AutoModelForCausalLM
 from peft import PeftModel
+import gradio as gr
 import os
+# Model loading
 base_model_name = "adarsh3601/my_gemma_pt3"
+adapter_name = "adarsh3601/my_gemma3_pt"
 device = "cuda" if torch.cuda.is_available() else "cpu"
+auth_token = os.getenv("HF_AUTH_TOKEN")  # Make sure to set the Hugging Face token as an environment variable
+# Load model and tokenizer
+tokenizer = AutoTokenizer.from_pretrained(base_model_name, use_auth_token=auth_token)
 base_model = AutoModelForCausalLM.from_pretrained(
     base_model_name,
+    device_map={"": device},
+    torch_dtype=torch.float16,
+    load_in_4bit=True,
+    use_auth_token=auth_token
 )
 model = PeftModel.from_pretrained(base_model, adapter_name)
 model.to(device)
+# Chat function
 def chat(message):
     inputs = tokenizer(message, return_tensors="pt")
+    inputs = {k: v.to(device).half() for k, v in inputs.items()}
+    outputs = model.generate(**inputs, max_new_tokens=150, do_sample=True)
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
     return response
+# Launch Gradio app
+iface = gr.Interface(fn=chat, inputs="text", outputs="text", title="Gemma Chatbot")
+iface.launch()