import gradio as gr import requests import mimetypes import json, os LLM_API = os.environ.get("LLM_API") LLM_URL = os.environ.get("LLM_URL") USER_ID = "HuggingFace Space" # Placeholder user ID def send_chat_message(LLM_URL, LLM_API, category, file_id): payload = { "inputs": {}, "query": category, "response_mode": "streaming", "conversation_id": "", "user": USER_ID, "files": [ { "type": "image", "transfer_method": "local_file", "upload_file_id": file_id } ] } print("Sending chat message payload:", payload) # Debug information response = requests.post( f"{LLM_URL}/chat-messages", headers={"Authorization": f"Bearer {LLM_API}"}, json=payload, stream=True # Enable streaming ) print("Request URL:", f"{LLM_URL}/chat-messages") print("Response status code:", response.status_code) if response.status_code == 404: return "Error: Endpoint not found (404)" # Handle the stream of events last_thought = None try: for line in response.iter_lines(decode_unicode=True): if line: try: data = json.loads(line.split("data: ")[1]) if data.get("event") == "agent_thought": last_thought = data.get("thought") except (IndexError, json.JSONDecodeError): continue except requests.exceptions.JSONDecodeError: return "Error: Invalid JSON response" if last_thought: # Structure the thought text return last_thought.strip() else: return "Error: No thought found in the response" def upload_file(LLM_URL, LLM_API, file_path, user_id): if not os.path.exists(file_path): return f"Error: File {file_path} not found" mime_type, _ = mimetypes.guess_type(file_path) with open(file_path, 'rb') as f: response = requests.post( f"{LLM_URL}/files/upload", headers={"Authorization": f"Bearer {LLM_API}"}, files={"file": (file_path, f, mime_type)}, data={"user": user_id} ) print("Upload response status code:", response.status_code) # Debug information if response.status_code == 404: return "Error: Endpoint not found (404)" print("Raw upload response text:", response.text) # Debug information try: return response.json() except requests.exceptions.JSONDecodeError: return "Error: Invalid JSON response" def handle_input(file_path, category): upload_response = upload_file(LLM_URL, LLM_API, file_path, USER_ID) print("Upload response:", upload_response) # Debug information if "error" in upload_response: return upload_response file_id = upload_response.get("id") # Extract file ID from the response if not file_id: return "Error: No file ID returned from upload" chat_response = send_chat_message(LLM_URL, LLM_API, category, file_id) print("Chat response:", chat_response) # Debug information return chat_response # Define Gradio interface file_input = gr.Image(label='圖片上傳', type='filepath') category = gr.Radio(label="Message Category", choices=["高鐵車票", "超商高鐵車票", "台鐵車票", "超商台鐵車票", "通行明細 (etag)", "QRCODE發票", "計程車乘車證明"]) examples = [ ["DEMO/qrcode.jpg", 'QRCODE發票'], ['DEMO/mthsr.JPG', '超商高鐵車票'], ['DEMO/thsr.jpg', '高鐵車票'], ['DEMO/mtra.jpg', '超商台鐵車票'], ['DEMO/tra.JPG', '台鐵車票'], ['DEMO/taxi.jpg', '計程車乘車證明'], ['DEMO/etag.jpg', '通行明細 (etag)'], ] TITLE = """

Large Multimodal Model (LMM) Playground 💬 輸入各種單據並選擇種類，解析得到各種關鍵資訊

""" SUBTITLE = """

TonTon Huang Ph.D. @ 2024/06

""" LINKS = """那些語音處理 (Speech Processing) 踩的坑 | 那些自然語言處理 (Natural Language Processing, NLP) 踩的坑 | 那些ASR和TTS可能會踩的坑 | 那些大模型開發會踩的坑 | 什麼是大語言模型，它是什麼？想要嗎？
用PaddleOCR的PPOCRLabel來微調醫療診斷書和收據 | 基於機器閱讀理解和指令微調的統一信息抽取框架之診斷書醫囑資訊擷取分析
""" with gr.Blocks() as iface: gr.HTML(TITLE) gr.HTML(SUBTITLE) gr.HTML(LINKS) gr.Interface( fn=handle_input, inputs=[file_input, category], outputs="text", examples=examples, allow_flagging="never" ) iface.launch()