enacimie commited on
Commit
496f987
·
verified ·
1 Parent(s): defda42

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -19
app.py CHANGED
@@ -2,50 +2,62 @@ import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
3
  import torch
4
 
5
- # Cargar modelo y tokenizer (solo una vez)
6
  model_name = "LiquidAI/LFM2-350M"
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
  model = AutoModelForCausalLM.from_pretrained(model_name)
9
 
10
- # Crear pipeline de generación
 
 
11
  pipe = pipeline(
12
  "text-generation",
13
  model=model,
14
  tokenizer=tokenizer,
15
- device=0 if torch.cuda.is_available() else -1, # Usa GPU si está disponible
16
- max_new_tokens=128,
17
  do_sample=True,
18
  temperature=0.7,
19
- top_p=0.9,
 
20
  )
21
 
22
  def chat_function(message, history):
23
- # Formatear historial como prompt (simple concatenación)
24
  conversation = ""
25
- for human, assistant in history:
26
  conversation += f"User: {human}\nAssistant: {assistant}\n"
27
  conversation += f"User: {message}\nAssistant:"
28
 
29
- # Generar respuesta
30
- outputs = pipe(conversation, return_full_text=False)
 
 
 
 
 
 
 
31
  response = outputs[0]['generated_text'].strip()
32
 
33
- # Limpiar posibles repeticiones o cortes
34
- if "User:" in response:
35
- response = response.split("User:")[0].strip()
36
- if "Assistant:" in response:
37
- response = response.split("Assistant:")[0].strip()
 
 
38
 
39
  return response
40
 
41
- # Interfaz Gradio
42
  with gr.Blocks(title="LFM2-350M Chat") as demo:
43
- gr.Markdown("# 🤖 LFM2-350M Chat (Simple)")
44
- gr.Markdown("Un chat simple usando el modelo `LiquidAI/LFM2-350M`. Ideal para pruebas en Hugging Face Spaces Free.")
45
 
46
  chatbot = gr.Chatbot(height=400)
47
- msg = gr.Textbox(label="Escribe tu mensaje", placeholder="Escribe algo...")
48
- clear = gr.Button("Limpiar")
49
 
50
  def respond(message, chat_history):
51
  bot_message = chat_function(message, chat_history)
 
2
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
3
  import torch
4
 
5
+ # Load model and tokenizer
6
  model_name = "LiquidAI/LFM2-350M"
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
  model = AutoModelForCausalLM.from_pretrained(model_name)
9
 
10
+ # Define stopping criteria — stop at end of assistant turn
11
+ stop_tokens = ["User:", "Assistant:", "\nUser", "\nAssistant"]
12
+
13
  pipe = pipeline(
14
  "text-generation",
15
  model=model,
16
  tokenizer=tokenizer,
17
+ device=0 if torch.cuda.is_available() else -1,
18
+ max_new_tokens=64, # More conservative
19
  do_sample=True,
20
  temperature=0.7,
21
+ top_p=0.92,
22
+ pad_token_id=tokenizer.eos_token_id,
23
  )
24
 
25
  def chat_function(message, history):
26
+ # Build prompt with only last 3 exchanges to avoid confusion
27
  conversation = ""
28
+ for human, assistant in history[-3:]: # Only keep last 3 turns
29
  conversation += f"User: {human}\nAssistant: {assistant}\n"
30
  conversation += f"User: {message}\nAssistant:"
31
 
32
+ # Generate
33
+ outputs = pipe(
34
+ conversation,
35
+ return_full_text=False,
36
+ max_new_tokens=64,
37
+ temperature=0.7,
38
+ top_p=0.92,
39
+ pad_token_id=tokenizer.eos_token_id,
40
+ )
41
  response = outputs[0]['generated_text'].strip()
42
 
43
+ # Aggressive cleanup: stop at any unwanted token
44
+ for stop in stop_tokens:
45
+ if stop in response:
46
+ response = response.split(stop)[0].strip()
47
+
48
+ # Remove trailing punctuation or colons
49
+ response = response.rstrip(":").strip()
50
 
51
  return response
52
 
53
+ # Gradio Interface
54
  with gr.Blocks(title="LFM2-350M Chat") as demo:
55
+ gr.Markdown("# 🤖 LFM2-350M Simple Chat")
56
+ gr.Markdown("A minimal chat interface using `LiquidAI/LFM2-350M`. Optimized for clean single-turn responses.")
57
 
58
  chatbot = gr.Chatbot(height=400)
59
+ msg = gr.Textbox(label="Type your message", placeholder="Say something...")
60
+ clear = gr.Button("Clear")
61
 
62
  def respond(message, chat_history):
63
  bot_message = chat_function(message, chat_history)