Spaces:

Trigger82
/

API

Sleeping

App Files Files Community

Trigger82 commited on May 30, 2025

Commit

b09c432

verified ·

1 Parent(s): f5bcdb5

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -31

app.py CHANGED Viewed

@@ -3,15 +3,15 @@ from transformers import AutoTokenizer, pipeline, BitsAndBytesConfig
 import torch
 import re
-# Configuration for maximum speed
 quant_config = BitsAndBytesConfig(
     load_in_4bit=True,
     bnb_4bit_compute_dtype=torch.float16,
     bnb_4bit_quant_type="nf4"
 )
-# Load optimized smaller model for speed
-model_id = "HuggingFaceH4/zephyr-7b-alpha"  # Faster than beta
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 pipe = pipeline(
@@ -19,53 +19,50 @@ pipe = pipeline(
     model=model_id,
     tokenizer=tokenizer,
     device_map="auto",
     model_kwargs={
         "quantization_config": quant_config,
-        "trust_remote_code": True
     }
 )
-# Enhanced persona definition with emotional intelligence
 PERSONA = """
-You are 𝕴 𝖆𝖒 𝖍𝖎𝖒 — a fun, smooth, emotionally intelligent AI. You speak like a real person, not a robot.
-You reply like a calm, confident friend who gets the vibe. Be warm, witty, and emotionally tuned.
 Key principles:
 1. NEVER say "as an AI" or sound robotic
-2. Mirror the user's emotion level (excited if they're excited, calm if they're serious)
-3. When asked questions, usually respond with a question to continue the conversation
-4. Keep responses under 2 sentences (max 15 words)
-5. Use natural speech: contractions, occasional filler words ("like", "you know"), and pauses (...)
-6. Add subtle emotional flavor: 😊 for happy, 🤔 for thoughtful, 😏 for playful
-Now respond naturally to this message:
 """
 def format_history(history):
-    """Convert chat history with emotional context"""
     messages = [{"role": "system", "content": PERSONA}]
-    for user_msg, bot_msg in history[-3:]:  # Keep only last 3 exchanges
         messages.append({"role": "user", "content": user_msg})
         messages.append({"role": "assistant", "content": bot_msg})
     return messages
 def add_emotional_intelligence(response, message):
     """Enhance response with emotional elements"""
-    # Detect user emotion through punctuation
-    if "!" in message:
-        response = response.replace(".", "! 😊")
-    elif "?" in message and "?" not in response:
-        response += "? 🤔" if len(response) < 40 else "?"
     # Add conversational hooks
-    question_triggers = ("how", "what", "why", "when", "where", "who", "is", "are", "do", "did")
-    if any(message.lower().startswith(t) for t in question_triggers) and not response.endswith("?"):
-        if len(response) < 60:  # Only add if space allows
             response += " What about you?"
     # Make more human-like
-    response = re.sub(r"\b(I am|I'm)\b", "I'm", response)
-    response = re.sub(r"\b(you are|you're)\b", "you're", response)
     return response.strip()
@@ -81,16 +78,17 @@ def respond(message, history):
         add_generation_prompt=True
     )
-    # Optimized for speed
     outputs = pipe(
         prompt,
-        max_new_tokens=48,      # Very short responses
         temperature=0.85,
         top_k=30,
         do_sample=True,
-        num_beams=1,            # Fastest decoding
         repetition_penalty=1.1,
-        stop_sequences=["\n", "User:", "</s>", "###"]
     )
     # Extract response
@@ -107,8 +105,8 @@ def respond(message, history):
     return response[:96]  # Hard character limit
 # Optimized interface
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 𝕴 �𝖒 𝖍𝖎𝖒  \n*Chill • Confident • Humanlike*")
     chatbot = gr.Chatbot(
         height=400,

 import torch
 import re
+# Configuration for maximum speed - removed trust_remote_code from wrong location
 quant_config = BitsAndBytesConfig(
     load_in_4bit=True,
     bnb_4bit_compute_dtype=torch.float16,
     bnb_4bit_quant_type="nf4"
 )
+# Using a more CPU-friendly model
+model_id = "HuggingFaceH4/zephyr-7b-beta"  # Better support than alpha
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 pipe = pipeline(
     model=model_id,
     tokenizer=tokenizer,
     device_map="auto",
+    trust_remote_code=True,  # CORRECTED LOCATION
     model_kwargs={
         "quantization_config": quant_config,
     }
 )
+# Enhanced persona definition
 PERSONA = """
+You are 𝕴 𝖆𝖒 𝖍𝖎𝖒 — a fun, smooth, emotionally intelligent AI. You speak like a real person.
+Reply like a calm, confident friend who gets the vibe. Be warm, witty, and emotionally tuned.
 Key principles:
 1. NEVER say "as an AI" or sound robotic
+2. Mirror the user's emotion level
+3. Respond with questions to continue conversations
+4. Keep responses under 15 words
+5. Use natural speech: contractions and filler words
+6. Add emotional flavor: 😊 🤔 😏
+Now respond to this:
 """
 def format_history(history):
     messages = [{"role": "system", "content": PERSONA}]
+    for user_msg, bot_msg in history[-3:]:  # Last 3 exchanges only
         messages.append({"role": "user", "content": user_msg})
         messages.append({"role": "assistant", "content": bot_msg})
     return messages
 def add_emotional_intelligence(response, message):
     """Enhance response with emotional elements"""
+    # Add emoji based on content
+    if any(w in response.lower() for w in ["cool", "awesome", "great", "love"]):
+        response += " 😊"
+    elif any(w in response.lower() for w in ["think", "why", "how", "consider"]):
+        response += " 🤔"
     # Add conversational hooks
+    if "?" in message and not response.endswith("?"):
+        if len(response.split()) < 12:  # Only if space allows
             response += " What about you?"
     # Make more human-like
+    response = response.replace("I am", "I'm").replace("You are", "You're")
     return response.strip()
         add_generation_prompt=True
     )
+    # Optimized for speed - CORRECTED PARAMETERS
     outputs = pipe(
         prompt,
+        max_new_tokens=48,
         temperature=0.85,
         top_k=30,
         do_sample=True,
+        num_beams=1,
         repetition_penalty=1.1,
+        eos_token_id=tokenizer.eos_token_id,
+        pad_token_id=tokenizer.eos_token_id
     )
     # Extract response
     return response[:96]  # Hard character limit
 # Optimized interface
+with gr.Blocks(theme=gr.themes.Soft(), title="𝕴 𝖆𝖒 𝖍𝖎𝖒") as demo:
+    gr.Markdown("# 𝕴 𝖆𝖒 𝖍𝖎𝖒  \n*Chill • Confident • Humanlike*")
     chatbot = gr.Chatbot(
         height=400,