Spaces:

cafe3310
/

ling-playground-basic

Running on Zero

App Files Files Community

cafe3310 commited on Sep 23

Commit

21916d9

1 Parent(s): d1912a0

初始化 LangGraph 聊天应用

Browse files

Files changed (4) hide show

GEMINI.md +68 -0
README.md +3 -0
app.py +68 -46
requirements.txt +4 -0

GEMINI.md ADDED Viewed

	@@ -0,0 +1,68 @@

+# Gemini 工作流与记忆
+## 工作规则
+- 我会始终跟踪「项目目标」。
+- 我会根据你的建议随时调整「子目标」。
+- 我的工作核心是：将「子目标」拆解为「Todolist」中的具体任务，并聚焦于执行当前任务。
+- 我会随时反思「Todolist」中的任务是否偏离了最终的「项目目标」。
+---
+# 项目目标
+## 未完成
+- [ ] 构建一个能够综合利用 `Ring-mini-2.0` 和 `Ling-flash-2.0` (或其量化版本) 的工作流应用。
+## 已完成
+- (暂无)
+---
+# 子目标
+## 未完成
+- [ ] **(进行中)** 解决模型体积过大导致部署失败的问题。
+- [ ] (已暂停) 实现自动化部署和验证流程。
+## 已完成
+- [x] 使用 LangGraph 实现一个可以路由两个模型的聊天网页应用。
+---
+# Todolist
+## 未完成
+- [ ] **当前任务**: 修改 `app.py`，移除 `Ling-flash-2.0` 模型，只保留 `Ring-mini-2.0`。
+- [ ] (待定) 根据用户找到的量化模型，更新 `app.py` 中的模型路径。
+- [ ] (已暂停) 搜索 `huggingface_hub` 文档，确认是否存在用于重启 Space 的 API。
+## 已完成
+- [x] **(用户决策)** 确认 `Ling-flash-2.0` 模型过大，暂时移除，仅使用 `Ring-mini-2.0`。
+- [x] 搭建 LangGraph 基础架构并重构 `app.py`。
+- [x] 实现基于用户输入的模型路由逻辑。
+- [x] 修复 `NameError: name 'operator' is not defined` 的 bug。
+- [x] 在 `README.md` 中链接模型。
+- [x] 创建并维护 `GEMINI.md` 文件。
+---
+## 核心模型
+- `inclusionAI/Ring-mini-2.0` (https://huggingface.co/inclusionAI/Ring-mini-2.0)
+## 技术栈及限制
+- **语言:** Python
+- **框架:** Gradio
+- **推理逻辑:** 由于这些模型没有 API 服务方，推理逻辑必须使用 PyTorch 自行实现。**禁止使用 `InferenceClient`**。
+## 依赖包 (Dependencies)
+- [`gradio`](https://pypi.org/project/gradio/)
+- [`huggingface-hub`](https://pypi.org/project/huggingface-hub/)
+- [`transformers`](https://pypi.org/project/transformers/)
+- [`accelerate`](https://pypi.org/project/accelerate/)
+- [`langgraph`](https://pypi.org/project/langgraph/)
+- [`langchain-community`](https://pypi.org/project/langchain-community/)
+- [`langchain-core`](https://pypi.org/project/langchain-core/)
+- [`spaces`](https://pypi.org/project/spaces/)
+## 开发环境及资源
+- **平台:** HuggingFace Spaces
+- **订阅:** HuggingFace Pro
+- **推理资源:** 可以使用 ZeroGPU
+- **文档参考:** 在必要的时候，主动搜索 HuggingFace 以及 Gradio 的在线 API 文档。

README.md CHANGED Viewed

@@ -11,6 +11,9 @@ hf_oauth: true
 hf_oauth_scopes:
 - inference-api
 license: apache-2.0
 ---
 An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).

 hf_oauth_scopes:
 - inference-api
 license: apache-2.0
+models:
+- inclusionAI/Ring-mini-2.0
+- inclusionAI/Ling-flash-2.0
 ---
 An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).

app.py CHANGED Viewed

@@ -3,6 +3,19 @@ import spaces
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 # 只加载一次模型和分词器
 MODEL_NAME = "inclusionAI/Ring-mini-2.0"
 device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -13,66 +26,75 @@ model = AutoModelForCausalLM.from_pretrained(
     trust_remote_code=True
 ).to(device)
-@spaces.GPU
-def respond(
-    message,
-    history: list[dict[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-    hf_token: gr.OAuthToken = None,  # 保持参数兼容
-):
-    """
-    使用 transformers 在 GPU 上本地推理 inclusionAI/Ring-mini-2.0
-    """
-    # 拼接历史和 system prompt，兼容 gradio ChatInterface 的消息格式
-    prompt = system_message + "\n"
-    # gradio history: [{"role": "user"/"assistant", "content": "..."}, ...]
-    last_role = None
-    for turn in history:
-        if turn.get("role") == "user":
-            prompt += f"User: {turn['content']}\n"
-            last_role = "user"
-        elif turn.get("role") == "assistant":
-            prompt += f"Assistant: {turn['content']}\n"
-            last_role = "assistant"
-    prompt += f"User: {message}\nAssistant:"
     input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
     output_ids = model.generate(
         input_ids,
-        max_new_tokens=max_tokens,
-        temperature=temperature,
-        top_p=top_p,
         do_sample=True,
         pad_token_id=tokenizer.eos_token_id,
     )
     output = tokenizer.decode(output_ids[0][input_ids.shape[1]:], skip_special_tokens=True)
-    # 流式输出
-    response = ""
-    for token in output.split():
-        response += token + " "
-        yield response.strip()
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
 chatbot = gr.ChatInterface(
     respond,
-    type="messages",
     additional_inputs=[
         gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
     ],
 )

 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
+import operator
+from typing import Annotated, Literal
+from typing_extensions import TypedDict
+from langchain_core.messages import AIMessage, AnyMessage, SystemMessage, HumanMessage, ToolMessage
+from langgraph.graph import StateGraph, END
+# 定义图的状态
+class GraphState(TypedDict):
+    messages: Annotated[list[AnyMessage], operator.add]
 # 只加载一次模型和分词器
 MODEL_NAME = "inclusionAI/Ring-mini-2.0"
 device = "cuda" if torch.cuda.is_available() else "cpu"
     trust_remote_code=True
 ).to(device)
+# 定义图的节点
+def call_model(state: GraphState):
+    """模型调用节点"""
+    messages = state["messages"]
+    # 拼接 prompt
+    prompt = ""
+    for msg in messages:
+        if msg.type == "system":
+            prompt += f"{msg.content}\n"
+        elif msg.type == "human":
+            prompt += f"User: {msg.content}\n"
+        elif msg.type == "ai":
+            prompt += f"Assistant: {msg.content}\n"
+    prompt += "Assistant:"
     input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
     output_ids = model.generate(
         input_ids,
+        max_new_tokens=512, # 暂时硬编码
         do_sample=True,
         pad_token_id=tokenizer.eos_token_id,
     )
     output = tokenizer.decode(output_ids[0][input_ids.shape[1]:], skip_special_tokens=True)
+    return {"messages": [AIMessage(content=output)]}
+# 构建图
+workflow = StateGraph(GraphState)
+workflow.add_node("llm", call_model)
+workflow.set_entry_point("llm")
+workflow.add_edge("llm", END)
+# 编译图
+app = workflow.compile()
+@spaces.GPU
+def respond(message, history, system_message, hf_token: gr.OAuthToken = None):
+    """Gradio 接口的响应函数，调用 LangGraph 应用"""
+    # 将 Gradio 的 history 格式转换为 LangChain 消息格式
+    messages = []
+    if system_message:
+        messages.append(SystemMessage(content=system_message))
+    for turn in history:
+        user_message, bot_message = turn
+        if user_message:
+            messages.append(HumanMessage(content=user_message))
+        if bot_message:
+            messages.append(AIMessage(content=bot_message))
+    messages.append(HumanMessage(content=message))
+    # 使用 invoke 方法进行一次性调用
+    inputs = {"messages": messages}
+    final_state = app.invoke(inputs)
+    # 从最终状态中提取最后一条消息
+    final_response = final_state["messages"][-1].content
+    return final_response
+# 重新定义 ChatInterface
 chatbot = gr.ChatInterface(
     respond,
+    type="messages", # 改为 messages 类型以更好地匹配 LangChain
     additional_inputs=[
         gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
     ],
 )

requirements.txt CHANGED Viewed

@@ -2,3 +2,7 @@ gradio
 huggingface-hub
 transformers
 accelerate

 huggingface-hub
 transformers
 accelerate
+langgraph
+langchain_community
+langchain_core
+spaces