Spaces:

kodetr
/

stunting-llm

Sleeping

kodetr commited on 18 days ago

Commit

9e26a79

verified ·

1 Parent(s): 39fb517

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -69,6 +69,18 @@ def stream_chat(message: str, history: list, temperature: float, max_new_tokens:
 # -------------------------------------
 # ------- use model stunting V5 -------
 # -------------------------------------
     terminators = [
         text_pipeline.tokenizer.eos_token_id,
@@ -77,7 +89,7 @@ def stream_chat(message: str, history: list, temperature: float, max_new_tokens:
     # Hasil dari pipeline akan berupa list dengan dictionary berisi text
     outputs = text_pipeline(
-        conversation,
         max_new_tokens=max_new_tokens,
         eos_token_id=terminators,
         do_sample=True,
@@ -87,10 +99,12 @@ def stream_chat(message: str, history: list, temperature: float, max_new_tokens:
         repetition_penalty=penalty
     )
-    # Karena pipeline tidak support streaming per token, kita bisa stream per kalimat atau per paragraf
-    full_text = outputs[0]["generated_text"]
     buffer = ""
-    for part in full_text.split(". "):  # Stream berdasarkan kalimat
         buffer += part.strip() + ". "
         yield buffer

 # -------------------------------------
 # ------- use model stunting V5 -------
 # -------------------------------------
+    # Ubah ke format prompt-style string
+    conversation_text = ""
+    for turn in conversation:
+        role = turn["role"]
+        content = turn["content"]
+        if role == "system":
+            conversation_text += f"[SYSTEM]: {content}\n"
+        elif role == "user":
+            conversation_text += f"[USER]: {content}\n"
+        elif role == "assistant":
+            conversation_text += f"[ASSISTANT]: {content}\n"
     terminators = [
         text_pipeline.tokenizer.eos_token_id,
     # Hasil dari pipeline akan berupa list dengan dictionary berisi text
     outputs = text_pipeline(
+        conversation_text,
         max_new_tokens=max_new_tokens,
         eos_token_id=terminators,
         do_sample=True,
         repetition_penalty=penalty
     )
+    # 4. Ekstrak teks hasil dan stream per kalimat
+    generated_text = outputs[0].get("generated_text", "")
+    streamed_text = generated_text[len(conversation_text):].strip()  # Hilangkan prompt awal
     buffer = ""
+    for part in streamed_text.split(". "):
         buffer += part.strip() + ". "
         yield buffer