Spaces:

DeepLearning101
/

Multimodal-Playground

Running

App Files Files Community

DeepLearning101 commited on Jun 15, 2024

Commit

60549f8

verified ·

1 Parent(s): 2080266

Create app.py

Browse files

Files changed (1) hide show

app.py +122 -0

app.py ADDED Viewed

	@@ -0,0 +1,122 @@

+import gradio as gr
+import requests
+import mimetypes
+import json, os
+LLM_API = os.environ.get("LLM_API")
+LLM_URL = os.environ.get("LLM_URL")
+USER_ID = "HuggingFace Space"  # Placeholder user ID
+def send_chat_message(LLM_URL, LLM_API, category, file_id):
+    payload = {
+        "inputs": {},
+        "query": category,
+        "response_mode": "streaming",
+        "conversation_id": "",
+        "user": USER_ID,
+        "files": [
+            {
+                "type": "image",
+                "transfer_method": "local_file",
+                "upload_file_id": file_id
+            }
+        ]
+    }
+    print("Sending chat message payload:", payload)  # Debug information
+    response = requests.post(
+        f"{LLM_URL}/chat-messages",
+        headers={"Authorization": f"Bearer {LLM_API}"},
+        json=payload,
+        stream=True  # Enable streaming
+    )
+    print("Request URL:", f"{LLM_URL}/chat-messages")
+    print("Response status code:", response.status_code)
+    if response.status_code == 404:
+        return "Error: Endpoint not found (404)"
+    # Handle the stream of events
+    last_thought = None
+    try:
+        for line in response.iter_lines(decode_unicode=True):
+            if line:
+                try:
+                    data = json.loads(line.split("data: ")[1])
+                    if data.get("event") == "agent_thought":
+                        last_thought = data.get("thought")
+                except (IndexError, json.JSONDecodeError):
+                    continue
+    except requests.exceptions.JSONDecodeError:
+        return "Error: Invalid JSON response"
+    if last_thought:
+        # Structure the thought text
+        return last_thought.strip()
+    else:
+        return "Error: No thought found in the response"
+def upload_file(LLM_URL, LLM_API, file_path, user_id):
+    if not os.path.exists(file_path):
+        return f"Error: File {file_path} not found"
+    mime_type, _ = mimetypes.guess_type(file_path)
+    with open(file_path, 'rb') as f:
+        response = requests.post(
+            f"{LLM_URL}/files/upload",
+            headers={"Authorization": f"Bearer {LLM_API}"},
+            files={"file": (file_path, f, mime_type)},
+            data={"user": user_id}
+        )
+    print("Upload response status code:", response.status_code)  # Debug information
+    if response.status_code == 404:
+        return "Error: Endpoint not found (404)"
+    print("Raw upload response text:", response.text)  # Debug information
+    try:
+        return response.json()
+    except requests.exceptions.JSONDecodeError:
+        return "Error: Invalid JSON response"
+def handle_input(file_path, category):
+    upload_response = upload_file(LLM_URL, LLM_API, file_path, USER_ID)
+    print("Upload response:", upload_response)  # Debug information
+    if "error" in upload_response:
+        return upload_response
+    file_id = upload_response.get("id")  # Extract file ID from the response
+    if not file_id:
+        return "Error: No file ID returned from upload"
+    chat_response = send_chat_message(LLM_URL, LLM_API, category, file_id)
+    print("Chat response:", chat_response)  # Debug information
+    return chat_response
+# Define Gradio interface
+file_input = gr.Image(label='圖片上傳', type='filepath')
+category = gr.Radio(label="Message Category", choices=["高鐵車票", "超商高鐵車票", "台鐵車票", "超商台鐵車票", "通行明細 (etag)", "QRCODE發票", "計程車乘車證明"])
+examples = [
+    ["DEMO/qrcode.jpg", 'QRCODE發票'],
+    ['DEMO/mthsr.JPG', '超商高鐵車票'],
+    ['DEMO/thsr.jpg', '高鐵車票'],
+    ['DEMO/mtra.jpg', '超商台鐵車票'],
+    ['DEMO/tra.JPG', '台鐵車票'],
+    ['DEMO/taxi.jpg', '計程車乘車證明'],
+    ['DEMO/etag.jpg', '通行明細 (etag)'],
+]
+TITLE = """<h1 align="center">Large Multimodal Model (LMM) Playground 💬 輸入各種單據並選擇種類，解析得到各種關鍵資訊 </h1>"""
+SUBTITLE = """<h2 align="center"><a href='https://www.twman.org' target='_blank'>TonTon Huang Ph.D. @ 2024/06 </a><br></h2>"""
+LINKS = """<a href='https://blog.twman.org/2021/04/ASR.html' target='_blank'>那些語音處理 (Speech Processing) 踩的坑</a> | <a href='https://blog.twman.org/2021/04/NLP.html' target='_blank'>那些自然語言處理 (Natural Language Processing, NLP) 踩的坑</a> | <a href='https://blog.twman.org/2024/02/asr-tts.html' target='_blank'>那些ASR和TTS可能會踩的坑</a> | <a href='https://blog.twman.org/2024/02/LLM.html' target='_blank'>那些大模型開發會踩的坑</a> | <a href='https://blog.twman.org/2023/04/GPT.html' target='_blank'>什麼是大語言模型，它是什麼？想要嗎？</a><br>
+<a href='https://blog.twman.org/2023/07/wsl.html' target='_blank'>用PaddleOCR的PPOCRLabel來微調醫療診斷書和收據</a> | <a href='https://blog.twman.org/2023/07/HugIE.html' target='_blank'>基於機器閱讀理解和指令微調的統一信息抽取框架之診斷書醫囑資訊擷取分析</a><br>"""
+with gr.Blocks() as iface:
+    gr.HTML(TITLE)
+    gr.HTML(SUBTITLE)
+    gr.HTML(LINKS)
+    gr.Interface(
+        fn=handle_input,
+        inputs=[file_input, category],
+        outputs="text",
+        examples=examples,
+        allow_flagging="never"
+    )
+iface.launch()