Spaces:

reactallegany
/

promptlab

Runtime error

App Files Files Community

bditto commited on Apr 16

Commit

c31d961

verified ·

1 Parent(s): 9b32dd3

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -15

app.py CHANGED Viewed

@@ -1,18 +1,45 @@
 import gradio as gr
-import os
-from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, pipeline
 from threading import Thread
 import random
-# Local model setup 🤖
 model_name = "HuggingFaceH4/zephyr-7b-beta"
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(
-    model_name,
-    device_map="auto",
-    load_in_4bit=True  # Reduces VRAM usage
 )
 # Safety tools 🛡️
 BLOCKED_WORDS = ["violence", "hate", "gun", "personal"]
 SAFE_IDEAS = [
@@ -20,7 +47,11 @@ SAFE_IDEAS = [
     "Code a game about recycling ♻️",
     "Plan an AI tool for school safety 🚸"
 ]
-safety_checker = pipeline("text-classification", model="unitary/toxic-bert")
 def is_safe(text):
     text = text.lower()
@@ -30,14 +61,12 @@ def is_safe(text):
     return not (result["label"] == "toxic" and result["score"] > 0.7)
 def respond(message, history, system_message, max_tokens, temperature, top_p):
-    # Safety check first 🔒
     if not is_safe(message):
         return f"🚫 Let's focus on positive projects! Try: {random.choice(SAFE_IDEAS)}"
-    # Prepare chat history
     messages = [{"role": "system", "content": system_message}]
-    for user_msg, bot_msg in history[-5:]:  # Keep last 5 exchanges
         if user_msg:
             messages.append({"role": "user", "content": user_msg})
         if bot_msg:
@@ -45,7 +74,6 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
     messages.append({"role": "user", "content": message})
-    # Tokenize and prepare streaming
     inputs = tokenizer.apply_chat_template(
         messages,
         return_tensors="pt"
@@ -60,11 +88,9 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
         "streamer": streamer
     }
-    # Start generation in thread
     thread = Thread(target=model.generate, kwargs=generation_kwargs)
     thread.start()
-    # Stream output
     partial_message = ""
     for new_token in streamer:
         partial_message += new_token

 import gradio as gr
+import torch
+from transformers import (
+    AutoModelForCausalLM,
+    AutoTokenizer,
+    TextIteratorStreamer,
+    pipeline,
+    BitsAndBytesConfig
+)
 from threading import Thread
 import random
+# Configuration 🛠️
 model_name = "HuggingFaceH4/zephyr-7b-beta"
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# Quantization setup
+quantization_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_compute_dtype=torch.float16,
+    bnb_4bit_use_double_quant=True,
 )
+# Model loading with fallback
+try:
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        quantization_config=quantization_config if device == "cuda" else None,
+        device_map="auto",
+        torch_dtype=torch.float16 if device == "cuda" else torch.float32
+    )
+except Exception as e:
+    print(f"Error loading model with GPU: {e}")
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        device_map="cpu",
+        torch_dtype=torch.float32
+    )
+tokenizer = AutoTokenizer.from_pretrained(model_name)
 # Safety tools 🛡️
 BLOCKED_WORDS = ["violence", "hate", "gun", "personal"]
 SAFE_IDEAS = [
     "Code a game about recycling ♻️",
     "Plan an AI tool for school safety 🚸"
 ]
+safety_checker = pipeline(
+    "text-classification",
+    model="unitary/toxic-bert",
+    device=0 if device == "cuda" else -1
+)
 def is_safe(text):
     text = text.lower()
     return not (result["label"] == "toxic" and result["score"] > 0.7)
 def respond(message, history, system_message, max_tokens, temperature, top_p):
     if not is_safe(message):
         return f"🚫 Let's focus on positive projects! Try: {random.choice(SAFE_IDEAS)}"
     messages = [{"role": "system", "content": system_message}]
+    for user_msg, bot_msg in history[-5:]:
         if user_msg:
             messages.append({"role": "user", "content": user_msg})
         if bot_msg:
     messages.append({"role": "user", "content": message})
     inputs = tokenizer.apply_chat_template(
         messages,
         return_tensors="pt"
         "streamer": streamer
     }
     thread = Thread(target=model.generate, kwargs=generation_kwargs)
     thread.start()
     partial_message = ""
     for new_token in streamer:
         partial_message += new_token