SimpleChatGradio-Ernie

Sleeping

App Files Files Community

enacimie commited on Sep 18

Commit

496f987

verified ·

1 Parent(s): defda42

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -19

app.py CHANGED Viewed

@@ -2,50 +2,62 @@ import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import torch
-# Cargar modelo y tokenizer (solo una vez)
 model_name = "LiquidAI/LFM2-350M"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(model_name)
-# Crear pipeline de generación
 pipe = pipeline(
     "text-generation",
     model=model,
     tokenizer=tokenizer,
-    device=0 if torch.cuda.is_available() else -1,  # Usa GPU si está disponible
-    max_new_tokens=128,
     do_sample=True,
     temperature=0.7,
-    top_p=0.9,
 )
 def chat_function(message, history):
-    # Formatear historial como prompt (simple concatenación)
     conversation = ""
-    for human, assistant in history:
         conversation += f"User: {human}\nAssistant: {assistant}\n"
     conversation += f"User: {message}\nAssistant:"
-    # Generar respuesta
-    outputs = pipe(conversation, return_full_text=False)
     response = outputs[0]['generated_text'].strip()
-    # Limpiar posibles repeticiones o cortes
-    if "User:" in response:
-        response = response.split("User:")[0].strip()
-    if "Assistant:" in response:
-        response = response.split("Assistant:")[0].strip()
     return response
-# Interfaz Gradio
 with gr.Blocks(title="LFM2-350M Chat") as demo:
-    gr.Markdown("# 🤖 LFM2-350M Chat (Simple)")
-    gr.Markdown("Un chat simple usando el modelo `LiquidAI/LFM2-350M`. Ideal para pruebas en Hugging Face Spaces Free.")
     chatbot = gr.Chatbot(height=400)
-    msg = gr.Textbox(label="Escribe tu mensaje", placeholder="Escribe algo...")
-    clear = gr.Button("Limpiar")
     def respond(message, chat_history):
         bot_message = chat_function(message, chat_history)

 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import torch
+# Load model and tokenizer
 model_name = "LiquidAI/LFM2-350M"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(model_name)
+# Define stopping criteria — stop at end of assistant turn
+stop_tokens = ["User:", "Assistant:", "\nUser", "\nAssistant"]
 pipe = pipeline(
     "text-generation",
     model=model,
     tokenizer=tokenizer,
+    device=0 if torch.cuda.is_available() else -1,
+    max_new_tokens=64,  # More conservative
     do_sample=True,
     temperature=0.7,
+    top_p=0.92,
+    pad_token_id=tokenizer.eos_token_id,
 )
 def chat_function(message, history):
+    # Build prompt with only last 3 exchanges to avoid confusion
     conversation = ""
+    for human, assistant in history[-3:]:  # Only keep last 3 turns
         conversation += f"User: {human}\nAssistant: {assistant}\n"
     conversation += f"User: {message}\nAssistant:"
+    # Generate
+    outputs = pipe(
+        conversation,
+        return_full_text=False,
+        max_new_tokens=64,
+        temperature=0.7,
+        top_p=0.92,
+        pad_token_id=tokenizer.eos_token_id,
+    )
     response = outputs[0]['generated_text'].strip()
+    # Aggressive cleanup: stop at any unwanted token
+    for stop in stop_tokens:
+        if stop in response:
+            response = response.split(stop)[0].strip()
+    # Remove trailing punctuation or colons
+    response = response.rstrip(":").strip()
     return response
+# Gradio Interface
 with gr.Blocks(title="LFM2-350M Chat") as demo:
+    gr.Markdown("# 🤖 LFM2-350M Simple Chat")
+    gr.Markdown("A minimal chat interface using `LiquidAI/LFM2-350M`. Optimized for clean single-turn responses.")
     chatbot = gr.Chatbot(height=400)
+    msg = gr.Textbox(label="Type your message", placeholder="Say something...")
+    clear = gr.Button("Clear")
     def respond(message, chat_history):
         bot_message = chat_function(message, chat_history)