Spaces:

Vanhwbt
/

API_chatbot

Runtime error

Vanhwbt commited on Feb 13

Commit

f0df364

1 Parent(s): 88ca24a

update

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,18 +1,26 @@
 import os
-from transformers import pipeline
 import gradio as gr
-# Gọi con Gemma ra làm việc
-# Lưu ý: Gemma-2b-it là bản đã được train để chat
-model_id = "google/gemma-2b-it"
-pipe = pipeline("text-generation", model=model_id, device_map="auto")
-def chat_with_gemma(message, history):
-    # Format tin nhắn theo chuẩn của Gemma
-    prompt = f"User: {message}\nAI:"
-    outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7)
-    return outputs[0]["generated_text"].split("AI:")[1].strip()
-# Tạo giao diện và API tự động
-demo = gr.ChatInterface(fn=chat_with_gemma, title="Gemma Backend của Thầy Oáp")
 demo.launch()

 import os
 import gradio as gr
+from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
+# Model này cực nhẹ, chạy trên CPU HF Space rất mượt
+model_id = "Qwen/Qwen2.5-1.5B-Instruct"
+# Tải tokenizer và model
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+pipe = pipeline(
+    "text-generation",
+    model=model_id,
+    torch_dtype="auto",
+    device_map="auto"
+)
+def chat(message, history):
+    messages = [{"role": "user", "content": message}]
+    # Format chuẩn cho Qwen
+    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    outputs = pipe(prompt, max_new_tokens=512, do_sample=True, temperature=0.7)
+    return outputs[0]["generated_text"].split("<|im_start|>assistant\n")[-1]
+demo = gr.ChatInterface(fn=chat, title="Gemma thì chậm, Qwen thì đậm chất chơi!")
 demo.launch()