Spaces:

Email-addon
/

GmailAddOn

Sleeping

App Files Files Community

Cyantist8208 commited on May 27, 2025

Commit

6b79d5e

1 Parent(s): 09c5a80

llama good

Browse files

Files changed (1) hide show

app.py +13 -62

app.py CHANGED Viewed

@@ -77,7 +77,7 @@ def add_docs(user_id: str, docs: list[str]) -> int:
     )
     return len(docs)
 # ----- Qwen-chat prompt helper ---------------------------------------------
-def build_qwen_prompt(system: str, context: list[str], user_question: str) -> str:
     """Return a Qwen-style prompt with multiple context items."""
     load_chat()  # 確保 tokenizer 載入
@@ -93,12 +93,19 @@ def build_qwen_prompt(system: str, context: list[str], user_question: str) -> st
     # 加入最終問題
     conversation.append({"role": "user", "content": user_question})
-    return tokenizer.apply_chat_template(
-        conversation, tokenize=False, add_generation_prompt=True
-    )
 # ---------- 4. Gradio playground (same UI as before) --------------------------
-# ---------- 4. Gradio playground ------------------------------------------
 def store_doc(doc_text: str, user_id="demo"):
     """UI callback: take the textbox content and shove it into the KB."""
     try:
@@ -137,7 +144,7 @@ def answer(system: str, context: str, question: str, user_id="demo", history="No
             context_list += store["texts"]
         # 2.  Build a Qwen-chat prompt (helper defined earlier)
-        prompt = build_qwen_prompt(system, context_list, question)
         # 3.  Tokenise & cap
         load_chat()
@@ -211,62 +218,6 @@ with gr.Blocks() as demo:
         outputs=answer_box
     )
-# ---------- 3. FastAPI layer --------------------------------------------------
-class IngestReq(BaseModel):
-    user_id:str
-    docs:list[str]
-class QueryReq(BaseModel):
-    user_id:str
-    question:str
-api = FastAPI()
-api = gr.mount_gradio_app(api, demo, path="/")
-@api.post("/ingest")
-def ingest(req:IngestReq):
-    load_embedder()
-    vecs = torch.stack([embed(t) for t in req.docs])
-    store = kb.setdefault(req.user_id, {"texts":[], "vecs":None})
-    store["texts"].extend(req.docs)
-    store["vecs"] = vecs if store["vecs"] is None else torch.cat([store["vecs"], vecs])
-    return {"added": len(req.docs)}
-@api.post("/query")
-def rag(req: QueryReq):
-    store = kb.get(req.user_id)
-    if not store:
-        raise HTTPException(404, "No knowledge ingested for this user.")
-    q_vec  = embed(req.question)
-    sims   = torch.matmul(store["vecs"], q_vec)
-    topk   = torch.topk(sims, k=min(4, sims.size(0))).indices
-    context = "\n".join(store["texts"][i] for i in topk.tolist())
-    SYSTEM_PROMPT = "You are a helpful assistant."
-    prompt = build_qwen_prompt(SYSTEM_PROMPT, [context], req.question)
-    load_chat()
-    tokens = tokenizer(
-        prompt,
-        return_tensors="pt",
-        add_special_tokens=False,
-    )
-    if tokens["input_ids"].size(1) > MAX_PROMPT_TOKENS:
-        tokens = {k: v[:, -MAX_PROMPT_TOKENS:] for k, v in tokens.items()}
-    tokens = {k: v.to(chat_model.device) for k, v in tokens.items()}
-    out = chat_model.generate(
-        **tokens,
-        max_new_tokens=512,
-        max_length=MAX_PROMPT_TOKENS + 512,
-    )
-    full = tokenizer.decode(out[0], skip_special_tokens=True)
-    ans  = full.split("<|im_start|>assistant")[-1].strip()
-    return {"answer": ans}
 # ---------- 5. run both (FastAPI + Gradio) -----------------------------------
 if __name__ == "__main__":
     # launch Gradio on a background thread

     )
     return len(docs)
 # ----- Qwen-chat prompt helper ---------------------------------------------
+def build_llm_prompt(system: str, context: list[str], user_question: str) -> str:
     """Return a Qwen-style prompt with multiple context items."""
     load_chat()  # 確保 tokenizer 載入
     # 加入最終問題
     conversation.append({"role": "user", "content": user_question})
+    prompt = ""
+    for turn in conversation:
+        role = turn["role"]
+        content = turn["content"].strip()
+        if role == "system":
+            prompt += f"<<SYS>>\n{content}\n<</SYS>>\n\n"
+        elif role == "user":
+            prompt += f"[INST] {content.strip()} [/INST]\n"
+        elif role == "assistant":
+            prompt += f"{content.strip()}\n"
+    return prompt
 # ---------- 4. Gradio playground (same UI as before) --------------------------
 def store_doc(doc_text: str, user_id="demo"):
     """UI callback: take the textbox content and shove it into the KB."""
     try:
             context_list += store["texts"]
         # 2.  Build a Qwen-chat prompt (helper defined earlier)
+        prompt = build_llm_prompt(system, context_list, question)
         # 3.  Tokenise & cap
         load_chat()
         outputs=answer_box
     )
 # ---------- 5. run both (FastAPI + Gradio) -----------------------------------
 if __name__ == "__main__":
     # launch Gradio on a background thread