DeepLearning101 commited on
Commit
60549f8
1 Parent(s): 2080266

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +122 -0
app.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ import mimetypes
4
+ import json, os
5
+
6
+ LLM_API = os.environ.get("LLM_API")
7
+ LLM_URL = os.environ.get("LLM_URL")
8
+
9
+ USER_ID = "HuggingFace Space" # Placeholder user ID
10
+
11
+ def send_chat_message(LLM_URL, LLM_API, category, file_id):
12
+ payload = {
13
+ "inputs": {},
14
+ "query": category,
15
+ "response_mode": "streaming",
16
+ "conversation_id": "",
17
+ "user": USER_ID,
18
+ "files": [
19
+ {
20
+ "type": "image",
21
+ "transfer_method": "local_file",
22
+ "upload_file_id": file_id
23
+ }
24
+ ]
25
+ }
26
+ print("Sending chat message payload:", payload) # Debug information
27
+ response = requests.post(
28
+ f"{LLM_URL}/chat-messages",
29
+ headers={"Authorization": f"Bearer {LLM_API}"},
30
+ json=payload,
31
+ stream=True # Enable streaming
32
+ )
33
+ print("Request URL:", f"{LLM_URL}/chat-messages")
34
+ print("Response status code:", response.status_code)
35
+ if response.status_code == 404:
36
+ return "Error: Endpoint not found (404)"
37
+
38
+ # Handle the stream of events
39
+ last_thought = None
40
+ try:
41
+ for line in response.iter_lines(decode_unicode=True):
42
+ if line:
43
+ try:
44
+ data = json.loads(line.split("data: ")[1])
45
+ if data.get("event") == "agent_thought":
46
+ last_thought = data.get("thought")
47
+ except (IndexError, json.JSONDecodeError):
48
+ continue
49
+ except requests.exceptions.JSONDecodeError:
50
+ return "Error: Invalid JSON response"
51
+
52
+ if last_thought:
53
+ # Structure the thought text
54
+ return last_thought.strip()
55
+ else:
56
+ return "Error: No thought found in the response"
57
+
58
+ def upload_file(LLM_URL, LLM_API, file_path, user_id):
59
+ if not os.path.exists(file_path):
60
+ return f"Error: File {file_path} not found"
61
+ mime_type, _ = mimetypes.guess_type(file_path)
62
+ with open(file_path, 'rb') as f:
63
+ response = requests.post(
64
+ f"{LLM_URL}/files/upload",
65
+ headers={"Authorization": f"Bearer {LLM_API}"},
66
+ files={"file": (file_path, f, mime_type)},
67
+ data={"user": user_id}
68
+ )
69
+ print("Upload response status code:", response.status_code) # Debug information
70
+ if response.status_code == 404:
71
+ return "Error: Endpoint not found (404)"
72
+ print("Raw upload response text:", response.text) # Debug information
73
+ try:
74
+ return response.json()
75
+ except requests.exceptions.JSONDecodeError:
76
+ return "Error: Invalid JSON response"
77
+
78
+ def handle_input(file_path, category):
79
+ upload_response = upload_file(LLM_URL, LLM_API, file_path, USER_ID)
80
+ print("Upload response:", upload_response) # Debug information
81
+ if "error" in upload_response:
82
+ return upload_response
83
+ file_id = upload_response.get("id") # Extract file ID from the response
84
+ if not file_id:
85
+ return "Error: No file ID returned from upload"
86
+
87
+ chat_response = send_chat_message(LLM_URL, LLM_API, category, file_id)
88
+ print("Chat response:", chat_response) # Debug information
89
+ return chat_response
90
+
91
+ # Define Gradio interface
92
+ file_input = gr.Image(label='圖片上傳', type='filepath')
93
+ category = gr.Radio(label="Message Category", choices=["高鐵車票", "超商高鐵車票", "台鐵車票", "超商台鐵車票", "通行明細 (etag)", "QRCODE發票", "計程車乘車證明"])
94
+
95
+ examples = [
96
+ ["DEMO/qrcode.jpg", 'QRCODE發票'],
97
+ ['DEMO/mthsr.JPG', '超商高鐵車票'],
98
+ ['DEMO/thsr.jpg', '高鐵車票'],
99
+ ['DEMO/mtra.jpg', '超商台鐵車票'],
100
+ ['DEMO/tra.JPG', '台鐵車票'],
101
+ ['DEMO/taxi.jpg', '計程車乘車證明'],
102
+ ['DEMO/etag.jpg', '通行明細 (etag)'],
103
+ ]
104
+
105
+ TITLE = """<h1 align="center">Large Multimodal Model (LMM) Playground 💬 輸入各種單據並選擇種類,解析得到各種關鍵資訊 </h1>"""
106
+ SUBTITLE = """<h2 align="center"><a href='https://www.twman.org' target='_blank'>TonTon Huang Ph.D. @ 2024/06 </a><br></h2>"""
107
+ LINKS = """<a href='https://blog.twman.org/2021/04/ASR.html' target='_blank'>那些語音處理 (Speech Processing) 踩的坑</a> | <a href='https://blog.twman.org/2021/04/NLP.html' target='_blank'>那些自然語言處理 (Natural Language Processing, NLP) 踩的坑</a> | <a href='https://blog.twman.org/2024/02/asr-tts.html' target='_blank'>那些ASR和TTS可能會踩的坑</a> | <a href='https://blog.twman.org/2024/02/LLM.html' target='_blank'>那些大模型開發會踩的坑</a> | <a href='https://blog.twman.org/2023/04/GPT.html' target='_blank'>什麼是大語言模型,它是什麼?想要嗎?</a><br>
108
+ <a href='https://blog.twman.org/2023/07/wsl.html' target='_blank'>用PaddleOCR的PPOCRLabel來微調醫療診斷書和收據</a> | <a href='https://blog.twman.org/2023/07/HugIE.html' target='_blank'>基於機器閱讀理解和指令微調的統一信息抽取框架之診斷書醫囑資訊擷取分析</a><br>"""
109
+
110
+ with gr.Blocks() as iface:
111
+ gr.HTML(TITLE)
112
+ gr.HTML(SUBTITLE)
113
+ gr.HTML(LINKS)
114
+ gr.Interface(
115
+ fn=handle_input,
116
+ inputs=[file_input, category],
117
+ outputs="text",
118
+ examples=examples,
119
+ allow_flagging="never"
120
+ )
121
+
122
+ iface.launch()