DeepLearning101's picture
Update app.py
f3c77fe verified
raw
history blame
5.66 kB
import gradio as gr
import requests
import mimetypes
import json, os
import asyncio
import aiohttp
LLM_API = os.environ.get("LLM_API")
LLM_URL = os.environ.get("LLM_URL")
USER_ID = "HuggingFace Space" # Placeholder user ID
async def send_chat_message(LLM_URL, LLM_API, category, file_id):
payload = {
"inputs": {},
"query": category,
"response_mode": "streaming",
"conversation_id": "",
"user": USER_ID,
"files": [
{
"type": "image",
"transfer_method": "local_file",
"upload_file_id": file_id
}
]
}
print("Sending chat message payload:", payload) # Debug information
async with aiohttp.ClientSession() as session:
async with session.post(
f"{LLM_URL}/chat-messages",
headers={"Authorization": f"Bearer {LLM_API}"},
json=payload
) as response:
print("Request URL:", f"{LLM_URL}/chat-messages")
print("Response status code:", response.status)
if response.status == 404:
return "Error: Endpoint not found (404)"
last_thought = None
async for line in response.content:
if line:
try:
data = json.loads(line.split(b"data: ")[1].decode("utf-8"))
if data.get("event") == "agent_thought":
last_thought = data.get("thought")
except (IndexError, json.JSONDecodeError):
continue
if last_thought:
return last_thought.strip()
else:
return "Error: No thought found in the response"
async def upload_file(LLM_URL, LLM_API, file_path, user_id):
if not os.path.exists(file_path):
return f"Error: File {file_path} not found"
mime_type, _ = mimetypes.guess_type(file_path)
with open(file_path, 'rb') as f:
async with aiohttp.ClientSession() as session:
form_data = aiohttp.FormData()
form_data.add_field('file', f, filename=file_path, content_type=mime_type)
form_data.add_field('user', user_id)
async with session.post(
f"{LLM_URL}/files/upload",
headers={"Authorization": f"Bearer {LLM_API}"},
data=form_data
) as response:
print("Upload response status code:", response.status) # Debug information
if response.status == 404:
return "Error: Endpoint not found (404)"
response_text = await response.text()
print("Raw upload response text:", response_text) # Debug information
try:
return json.loads(response_text)
except json.JSONDecodeError:
return "Error: Invalid JSON response"
async def handle_input(file_path, category):
upload_response = await upload_file(LLM_URL, LLM_API, file_path, USER_ID)
print("Upload response:", upload_response) # Debug information
if "error" in upload_response:
return upload_response
file_id = upload_response.get("id") # Extract file ID from the response
if not file_id:
return "Error: No file ID returned from upload"
chat_response = await send_chat_message(LLM_URL, LLM_API, category, file_id)
print("Chat response:", chat_response) # Debug information
return chat_response
# Define Gradio interface
file_input = gr.Image(label='圖片上傳', type='filepath')
category = gr.Radio(label="Message Category", choices=["高鐵車票", "超商高鐵車票", "台鐵車票", "超商台鐵車票", "通行明細 (etag)", "QRCODE發票", "計程車乘車證明"])
examples = [
["DEMO/qrcode.jpg", 'QRCODE發票'],
['DEMO/mthsr.JPG', '超商高鐵車票'],
['DEMO/thsr.jpg', '高鐵車票'],
['DEMO/mtra.jpg', '超商台鐵車票'],
['DEMO/tra.JPG', '台鐵車票'],
['DEMO/taxi.jpg', '計程車乘車證明'],
['DEMO/etag.jpg', '通行明細 (etag)'],
]
TITLE = """<h1 align="center">Large Multimodal Model (LMM) Playground 💬 輸入各種單據並選擇種類,解析得到各種關鍵資訊 </h1>"""
SUBTITLE = """<h2 align="center"><a href='https://www.twman.org' target='_blank'>TonTon Huang Ph.D. @ 2024/06 </a><br></h2>"""
LINKS = """<a href='https://blog.twman.org/2021/04/ASR.html' target='_blank'>那些語音處理 (Speech Processing) 踩的坑</a> | <a href='https://blog.twman.org/2021/04/NLP.html' target='_blank'>那些自然語言處理 (Natural Language Processing, NLP) 踩的坑</a> | <a href='https://blog.twman.org/2024/02/asr-tts.html' target='_blank'>那些ASR和TTS可能會踩的坑</a> | <a href='https://blog.twman.org/2024/02/LLM.html' target='_blank'>那些大模型開發會踩的坑</a> | <a href='https://blog.twman.org/2023/04/GPT.html' target='_blank'>什麼是大語言模型,它是什麼?想要嗎?</a><br>
<a href='https://blog.twman.org/2023/07/wsl.html' target='_blank'>用PaddleOCR的PPOCRLabel來微調醫療診斷書和收據</a> | <a href='https://blog.twman.org/2023/07/HugIE.html' target='_blank'>基於機器閱讀理解和指令微調的統一信息抽取框架之診斷書醫囑資訊擷取分析</a><br>"""
with gr.Blocks() as iface:
gr.HTML(TITLE)
gr.HTML(SUBTITLE)
gr.HTML(LINKS)
gr.Interface(
fn=handle_input,
inputs=[file_input, category],
outputs="text",
examples=examples,
allow_flagging="never"
)
iface.launch()