HeckAPI

Sleeping

App Files Files Community

keungliang commited on 10 days ago

Commit

74a279e

verified ·

1 Parent(s): 52d60d4

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -243

app.py CHANGED Viewed

@@ -1,124 +1,18 @@
-from flask import Flask, request, Response, json
-import requests
-from uuid import uuid4
-import time
-import os
-import re
-from flask_cors import CORS
-app = Flask(__name__)
-CORS(app)  # 启用CORS支持
-# 从环境变量获取 API Key
-API_KEY = os.environ.get('API_KEY')
-if not API_KEY:
-    raise ValueError("API_KEY environment variable is required")
-MODEL_MAPPING = {
-    "deepseek": "deepseek/deepseek-chat",
-    "gpt-4o-mini": "openai/gpt-4o-mini",
-    "gemini-flash-1.5": "google/gemini-flash-1.5",
-    "deepseek-reasoner": "deepseek-reasoner",
-    "minimax-01": "minimax/minimax-01"
-}
-def verify_api_key():
-    auth_header = request.headers.get('Authorization')
-    if not auth_header:
-        return False
-    try:
-        # 支持 Bearer token 格式
-        if auth_header.startswith('Bearer '):
-            token = auth_header.split(' ')[1]
-        else:
-            token = auth_header
-        return token == API_KEY
-    except:
-        return False
-def make_heck_request(question, session_id, messages, actual_model):
-    previous_question = previous_answer = None
-    if len(messages) >= 2:
-        for i in range(len(messages)-2, -1, -1):
-            if messages[i]["role"] == "user":
-                previous_question = messages[i]["content"]
-                if i+1 < len(messages) and messages[i+1]["role"] == "assistant":
-                    previous_answer = messages[i+1]["content"]
-                break
-    payload = {
-        "model": actual_model,
-        "question": question,
-        "language": "Chinese",
-        "sessionId": session_id,
-        "previousQuestion": previous_question,
-        "previousAnswer": previous_answer
-    }
-    headers = {
-        "Content-Type": "application/json",
-        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
-    }
-    return requests.post(
-        "https://gateway.aiapilab.com/api/ha/v1/chat",
-        json=payload,
-        headers=headers,
-        stream=True
-    )
-# 以下兩個輔助函式，分段輸出以保留換行、清單等 Markdown 格式
-def parse_markdown_content(content):
-    """
-    先按「空行」拆成段落，再在每個段落裡偵測清單項目 (如 "1. "、"2. " 等)，
-    將其獨立拆分，以便逐段輸出。
-    """
-    paragraph_regex = re.compile(r'(?:\r?\n){2,}')  # 連續2個以上換行做為分隔
-    paragraphs = paragraph_regex.split(content)
-    for paragraph in paragraphs:
-        # 再以清單項 (數字. 或數字) 拆分
-        yield from chunk_paragraph_by_list_item(paragraph)
-def chunk_paragraph_by_list_item(paragraph):
-    """
-    將段落中的清單項 ("1. ", "2. " 等) 與其他文字區分開，逐段產出。
-    """
-    list_item_regex = re.compile(r'(^|\n)\s*\d+\.\s+')
-    last_index = 0
-    for match in list_item_regex.finditer(paragraph):
-        # 先把清單標記前的文字輸出
-        if match.start() > last_index:
-            text_before = paragraph[last_index:match.start()]
-            if text_before.strip():
-                yield text_before + "\n"
-        # 輸出清單項本身 (例如 "1. ")
-        yield match.group(0)
-        last_index = match.end()
-    # 如果後面還有剩餘文字，也輸出
-    if last_index < len(paragraph):
-        yield paragraph[last_index:] + "\n"
-    # 段落結尾再補一個空行
-    yield "\n"
 def stream_response(question, session_id, messages, request_model, actual_model):
     resp = make_heck_request(question, session_id, messages, actual_model)
     is_answering = False
     for line in resp.iter_lines():
         if line:
             line = line.decode('utf-8')
             if not line.startswith('data: '):
                 continue
             content = line[6:].strip()
             if content == "[ANSWER_START]":
                 is_answering = True
-                # 先送出角色宣告chunk
                 chunk = {
                     "id": session_id,
                     "object": "chat.completion.chunk",
@@ -131,9 +25,23 @@ def stream_response(question, session_id, messages, request_model, actual_model)
                 }
                 yield f"data: {json.dumps(chunk, ensure_ascii=False)}\n\n"
                 continue
             if content == "[ANSWER_DONE]":
-                # 最後的結束chunk
                 chunk = {
                     "id": session_id,
                     "object": "chat.completion.chunk",
@@ -147,136 +55,17 @@ def stream_response(question, session_id, messages, request_model, actual_model)
                 }
                 yield f"data: {json.dumps(chunk, ensure_ascii=False)}\n\n"
                 break
-            # 如果確定進入答題階段，且內容不是 [RELATE_Q...] 等系統標記，就進行分段輸出
             if is_answering and content and not content.startswith("[RELATE_Q"):
-                # 這裡參考了 xgrok 中的「段落 + 清單項」分段邏輯
-                for sub_content in parse_markdown_content(content):
-                    # 去除如果整段都是空白，就不輸出了
-                    if not sub_content.strip():
-                        continue
-                    chunk = {
-                        "id": session_id,
-                        "object": "chat.completion.chunk",
-                        "created": int(time.time()),
-                        "model": request_model,
-                        "choices": [{
-                            "index": 0,
-                            "delta": {"content": sub_content},
-                        }]
-                    }
-                    yield f"data: {json.dumps(chunk, ensure_ascii=False)}\n\n"
-def normal_response(question, session_id, messages, request_model, actual_model):
-    resp = make_heck_request(question, session_id, messages, actual_model)
-    full_content = []
-    is_answering = False
-    for line in resp.iter_lines():
-        if line:
-            line = line.decode('utf-8')
-            if line.startswith('data: '):
-                content = line[6:].strip()
-                if content == "[ANSWER_START]":
-                    is_answering = True
-                elif content == "[ANSWER_DONE]":
-                    break
-                elif is_answering:
-                    full_content.append(content)
-    response = {
-        "id": session_id,
-        "object": "chat.completion",
-        "created": int(time.time()),
-        "model": request_model,
-        "choices": [{
-            "index": 0,
-            "message": {
-                "role": "assistant",
-                "content": "".join(full_content)
-            },
-            "finish_reason": "stop"
-        }]
-    }
-    return response
-@app.route("/hf/v1/models", methods=["GET"])
-def list_models():
-    models = []
-    for model_id, _ in MODEL_MAPPING.items():
-        models.append({
-            "id": model_id,
-            "object": "model",
-            "created": int(time.time()),
-            "owned_by": "heck",
-        })
-    return {
-        "object": "list",
-        "data": models
-    }
-@app.route("/hf/v1/chat/completions", methods=["POST"])
-def chat_completions():
-    # API Key 验证
-    if not verify_api_key():
-        return {"error": "Invalid API Key"}, 401
-    data = request.json
-    if not data or "model" not in data:
-        return {"error": "Invalid request - missing model"}, 400
-    if not data.get("messages"):
-        return {"error": "Invalid request - missing messages"}, 400
-    # 验证消息格式
-    for msg in data["messages"]:
-        if not isinstance(msg, dict):
-            return {"error": "Invalid message format"}, 400
-        if "role" not in msg or "content" not in msg:
-            return {"error": "Invalid message format"}, 400
-        # 检查content的类型
-        if isinstance(msg["content"], list):
-            # 如果content是列表，确保每个元素都有text字段
-            for item in msg["content"]:
-                if not isinstance(item, dict) or "text" not in item:
-                    return {"error": "Invalid content format"}, 400
-            # 提取所有text字段并合并
-            msg["content"] = " ".join(item["text"] for item in msg["content"])
-        elif not isinstance(msg["content"], str):
-            return {"error": "Invalid content type"}, 400
-    model = MODEL_MAPPING.get(data["model"])
-    if not model:
-        return {"error": "Unsupported Model"}, 400
-    try:
-        question = next((msg["content"] for msg in reversed(data["messages"])
-                        if msg["role"] == "user"), None)
-    except Exception as e:
-        return {"error": "Failed to extract question"}, 400
-    if not question:
-        return {"error": "No user message found"}, 400
-    session_id = str(uuid4())
-    try:
-        if data.get("stream"):
-            return Response(
-                stream_response(question, session_id, data["messages"],
-                                data["model"], model),
-                mimetype="text/event-stream"
-            )
-        else:
-            return normal_response(question, session_id, data["messages"],
-                                   data["model"], model)
-    except Exception as e:
-        return {"error": f"Internal server error: {str(e)}"}, 500
-if __name__ == "__main__":
-    # 使用环境变量获取端口，默认为7860（HF Spaces 默认端口）
-    port = int(os.environ.get("PORT", 7860))
-    app.run(host='0.0.0.0', port=port)

 def stream_response(question, session_id, messages, request_model, actual_model):
     resp = make_heck_request(question, session_id, messages, actual_model)
     is_answering = False
+    buffer = ""
     for line in resp.iter_lines():
         if line:
             line = line.decode('utf-8')
             if not line.startswith('data: '):
                 continue
             content = line[6:].strip()
             if content == "[ANSWER_START]":
                 is_answering = True
                 chunk = {
                     "id": session_id,
                     "object": "chat.completion.chunk",
                 }
                 yield f"data: {json.dumps(chunk, ensure_ascii=False)}\n\n"
                 continue
             if content == "[ANSWER_DONE]":
+                # 如果buffer中還有內容，先輸出
+                if buffer:
+                    chunk = {
+                        "id": session_id,
+                        "object": "chat.completion.chunk",
+                        "created": int(time.time()),
+                        "model": request_model,
+                        "choices": [{
+                            "index": 0,
+                            "delta": {"content": buffer},
+                        }]
+                    }
+                    yield f"data: {json.dumps(chunk, ensure_ascii=False)}\n\n"
+                # 結束標記
                 chunk = {
                     "id": session_id,
                     "object": "chat.completion.chunk",
                 }
                 yield f"data: {json.dumps(chunk, ensure_ascii=False)}\n\n"
                 break
             if is_answering and content and not content.startswith("[RELATE_Q"):
+                # 直接輸出內容，不做額外的格式處理
+                chunk = {
+                    "id": session_id,
+                    "object": "chat.completion.chunk",
+                    "created": int(time.time()),
+                    "model": request_model,
+                    "choices": [{
+                        "index": 0,
+                        "delta": {"content": content},
+                    }]
+                }
+                yield f"data: {json.dumps(chunk, ensure_ascii=False)}\n\n"