Spaces:

hackergeek
/

CBT

Runtime error

App Files Files Community

hackergeek commited on Mar 21

Commit

f086418

verified ·

1 Parent(s): 0a5d97f

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -34

app.py CHANGED Viewed

@@ -2,15 +2,18 @@ import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
-# Load your fine-tuned model and tokenizer
 model = AutoModelForCausalLM.from_pretrained(
     "hackergeek/gemma-finetuned",
-    torch_dtype=torch.float16,
-    device_map="auto"
 )
 tokenizer = AutoTokenizer.from_pretrained("hackergeek/gemma-finetuned")
 tokenizer.pad_token = tokenizer.eos_token
 def format_prompt(message, history):
     """Format the prompt with conversation history"""
     system_prompt = "You are a knowledgeable space expert assistant. Answer questions about astronomy, space exploration, and related topics in a clear and engaging manner."
@@ -26,55 +29,47 @@ def respond(message, history):
     # Format the prompt with conversation history
     full_prompt = format_prompt(message, history)
-    # Tokenize input
-    inputs = tokenizer(full_prompt, return_tensors="pt", add_special_tokens=False).to(model.device)
-    # Generate response
     outputs = model.generate(
-        **inputs,
-        max_new_tokens=1024,
         temperature=0.7,
-        top_p=0.9,
         repetition_penalty=1.1,
-        do_sample=True
     )
-    # Decode and extract only the new response
     response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
     return response
-# Custom CSS for space theme
 space_css = """
-.gradio-container {
-    background: linear-gradient(45deg, #000000, #1a1a2e);
-    color: white;
-}
-.chatbot {
-    background-color: rgba(0, 0, 0, 0.7) !important;
-    border: 1px solid #4a4a4a !important;
-}
 """
-# Create the interface
-with gr.Blocks(css=space_css, theme=gr.themes.Default(primary_hue="blue", secondary_hue="purple")) as demo:
-    gr.Markdown("# 🚀 Space Explorer Chatbot 🌌")
-    gr.Markdown("Ask me anything about space! Planets, stars, galaxies, or space exploration!")
     chatbot = gr.ChatInterface(
         respond,
         examples=[
-            "Explain black holes in simple terms",
-            "What's the latest news about Mars exploration?",
-            "How do stars form?",
-            "Tell me about the James Webb Space Telescope"
         ],
-        retry_btn=None,
-        undo_btn=None,
-        clear_btn="Clear History",
     )
-    chatbot.chatbot.height = 600
 if __name__ == "__main__":
-    demo.launch(share=True)

 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
+# Load model and tokenizer with CPU optimizations
 model = AutoModelForCausalLM.from_pretrained(
     "hackergeek/gemma-finetuned",
+    torch_dtype=torch.float32,  # Changed to float32 for CPU compatibility
+    device_map="cpu"            # Force CPU usage
 )
 tokenizer = AutoTokenizer.from_pretrained("hackergeek/gemma-finetuned")
 tokenizer.pad_token = tokenizer.eos_token
+# Explicitly move model to CPU (redundant but safe)
+model.to("cpu")
 def format_prompt(message, history):
     """Format the prompt with conversation history"""
     system_prompt = "You are a knowledgeable space expert assistant. Answer questions about astronomy, space exploration, and related topics in a clear and engaging manner."
     # Format the prompt with conversation history
     full_prompt = format_prompt(message, history)
+    # Tokenize input (keep on CPU)
+    inputs = tokenizer(full_prompt, return_tensors="pt", add_special_tokens=False)
+    # Generate response with CPU-friendly parameters
     outputs = model.generate(
+        input_ids=inputs.input_ids,
+        attention_mask=inputs.attention_mask,
+        max_new_tokens=512,        # Reduced for faster CPU processing
         temperature=0.7,
+        top_p=0.85,
         repetition_penalty=1.1,
+        do_sample=True,
+        no_repeat_ngram_size=2      # Added to reduce repetition
     )
+    # Decode response
     response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
     return response
+# Simplified CSS for better CPU rendering
 space_css = """
+.gradio-container { background: #000000; color: #ffffff; }
+.chatbot { background: #0a0a2a !important; }
 """
+with gr.Blocks(css=space_css) as demo:
+    gr.Markdown("# 🚀 CPU Space Chatbot 🌌")
+    gr.Markdown("Note: Responses may be slower due to CPU processing")
     chatbot = gr.ChatInterface(
         respond,
         examples=[
+            "What is a neutron star?",
+            "Explain the Big Bang theory",
+            "How do rockets work?",
+            "What's the temperature on Venus?"
         ],
+        clear_btn="Clear",
     )
+    chatbot.chatbot.height = 500
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)