Spaces:
Sleeping
Sleeping
fix: remove generator/queue, add api_name, add startup logs to diagnose No API Found
Browse files
app.py
CHANGED
|
@@ -8,51 +8,55 @@ import traceback
|
|
| 8 |
|
| 9 |
import gradio as gr
|
| 10 |
|
| 11 |
-
|
|
|
|
|
|
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
def run_vqa_extraction(
|
| 15 |
pdf_files,
|
| 16 |
-
task_name
|
| 17 |
-
api_url
|
| 18 |
-
llm_api_key
|
| 19 |
-
mineru_api_key
|
| 20 |
-
model_name
|
| 21 |
-
max_workers
|
| 22 |
):
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
yield None, "โณ [1/5] ่ฎพ็ฝฎ็ฏๅขๅ้โฆ"
|
| 39 |
-
|
| 40 |
-
os.environ["DF_API_KEY"] = llm_api_key.strip()
|
| 41 |
-
os.environ["MINERU_API_KEY"] = mineru_api_key.strip()
|
| 42 |
-
|
| 43 |
-
workspace = tempfile.mkdtemp(prefix="dataflow_vqa_")
|
| 44 |
-
cache_dir = os.path.join(workspace, "cache")
|
| 45 |
os.makedirs(cache_dir, exist_ok=True)
|
| 46 |
original_cwd = os.getcwd()
|
| 47 |
|
| 48 |
try:
|
| 49 |
os.chdir(workspace)
|
| 50 |
|
| 51 |
-
# โโ
|
| 52 |
-
|
|
|
|
|
|
|
| 53 |
pdf_paths = []
|
| 54 |
for i, f in enumerate(pdf_files):
|
| 55 |
-
src = f if isinstance(f, str) else (
|
|
|
|
| 56 |
dst = os.path.join(workspace, f"input_{i}.pdf")
|
| 57 |
shutil.copy(src, dst)
|
| 58 |
pdf_paths.append(dst)
|
|
@@ -61,58 +65,62 @@ def run_vqa_extraction(
|
|
| 61 |
with open(input_jsonl, "w") as fout:
|
| 62 |
entry = {
|
| 63 |
"input_pdf_paths": pdf_paths if len(pdf_paths) > 1 else pdf_paths[0],
|
| 64 |
-
"name": task_name
|
| 65 |
}
|
| 66 |
fout.write(json.dumps(entry, ensure_ascii=False) + "\n")
|
| 67 |
|
| 68 |
-
# โโ
|
| 69 |
-
yield None, "โณ [3/5] ๅ ่ฝฝ Pipeline ๆจกๅ๏ผ้ฆๆฌกๅฏ่ฝ้่ฆ 10โ30 ็ง๏ผโฆ"
|
| 70 |
try:
|
| 71 |
-
from pipelines.vqa_extract_optimized_pipeline import
|
|
|
|
|
|
|
| 72 |
except Exception:
|
| 73 |
-
|
| 74 |
-
yield err(f"ๅฏผๅ
ฅ Pipeline ๅคฑ่ดฅ๏ผ่ฏทๆฃๆฅไพ่ตๅฎ่ฃ
๏ผ\n{tb}"); return
|
| 75 |
|
|
|
|
| 76 |
try:
|
| 77 |
pipeline = PDF_VQA_extract_optimized_pipeline(
|
| 78 |
-
input_file
|
| 79 |
-
api_url
|
| 80 |
-
model_name
|
| 81 |
-
max_workers
|
| 82 |
)
|
| 83 |
pipeline.compile()
|
| 84 |
except ValueError as e:
|
| 85 |
msg = str(e)
|
| 86 |
if "DF_API_KEY" in msg:
|
| 87 |
-
|
| 88 |
if "MINERU_API_KEY" in msg:
|
| 89 |
-
|
| 90 |
-
|
| 91 |
|
| 92 |
-
# โโ
|
| 93 |
-
yield None, "โณ [4/5] ่ฐ็จ MinerU ่งฃๆ PDF + LLM ๆๅ QA๏ผๅฏ่ฝ้่ฆๆฐๅ้๏ผ่ฏท่ๅฟ็ญๅพ
๏ผโฆ"
|
| 94 |
try:
|
| 95 |
pipeline.forward()
|
| 96 |
except RuntimeError as e:
|
| 97 |
msg = str(e)
|
| 98 |
if "no api found" in msg.lower() or "Apply upload urls failed" in msg:
|
| 99 |
-
|
| 100 |
-
"MinerU API Key ๆ ๆๆๅทฒ่ฟๆใ\n"
|
| 101 |
-
"่ฏทๅฐ https://mineru.net/apiManage/token ้ๆฐ็ณ่ฏท
|
| 102 |
f"ๅๅง้่ฏฏ๏ผ{msg}"
|
| 103 |
-
)
|
| 104 |
if "Cannot connect to LLM server" in msg:
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
# โโ
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
|
|
|
| 112 |
if not step_files:
|
| 113 |
-
|
| 114 |
|
| 115 |
-
max_step = max(
|
|
|
|
|
|
|
|
|
|
| 116 |
max_step_file = os.path.join(cache_dir, f"vqa_step{max_step}.jsonl")
|
| 117 |
|
| 118 |
result_file = os.path.join(workspace, "raw_vqa.jsonl")
|
|
@@ -129,108 +137,89 @@ def run_vqa_extraction(
|
|
| 129 |
f_out.write(json.dumps(out, ensure_ascii=False) + "\n")
|
| 130 |
count += 1
|
| 131 |
|
| 132 |
-
|
| 133 |
|
| 134 |
except Exception:
|
| 135 |
-
|
| 136 |
-
yield err(f"ๆช็ฅ้่ฏฏ๏ผ\n{tb}")
|
| 137 |
finally:
|
| 138 |
os.chdir(original_cwd)
|
| 139 |
|
| 140 |
|
| 141 |
-
# โโ
|
| 142 |
|
| 143 |
with gr.Blocks(title="DataFlow-VQA ยท PDF ๆๅ Demo", theme=gr.themes.Soft()) as demo:
|
| 144 |
gr.Markdown(
|
| 145 |
"""
|
| 146 |
# ๐ฌ DataFlow-VQA โ ไป PDF ๆๅ VQA ๆฐๆฎ
|
| 147 |
|
| 148 |
-
ไธไผ ๆๆๆ่ฏๅท PDF๏ผ
|
| 149 |
|
| 150 |
-
**ๆต็จ๏ผ** PDF
|
| 151 |
|
| 152 |
> ๆๆ API ่ฐ็จๅ้่ฟๆจๆไพ็ๅฏ้ฅๅฎๆ๏ผๆฌ Space ไธๅญๅจไปปไฝๆฐๆฎๆๅฏ้ฅใ
|
| 153 |
"""
|
| 154 |
)
|
| 155 |
|
| 156 |
with gr.Row():
|
| 157 |
-
with gr.Column(
|
| 158 |
gr.Markdown("### ๐ ไธไผ PDF")
|
| 159 |
pdf_files = gr.File(
|
| 160 |
-
label="ไธไผ PDF๏ผ
|
| 161 |
file_types=[".pdf"],
|
| 162 |
file_count="multiple",
|
| 163 |
)
|
| 164 |
-
task_name = gr.Textbox(
|
| 165 |
-
label="ไปปๅกๅ็งฐ๏ผ็จไบ็ฎๅฝๅฝๅ๏ผ",
|
| 166 |
-
value="task1",
|
| 167 |
-
placeholder="task1",
|
| 168 |
-
)
|
| 169 |
|
| 170 |
-
gr.Markdown("### โ๏ธ LLM
|
| 171 |
api_url = gr.Textbox(
|
| 172 |
label="API Base URL",
|
| 173 |
value="https://generativelanguage.googleapis.com/v1beta/openai/",
|
| 174 |
-
placeholder="https://api.openai.com/v1",
|
| 175 |
)
|
| 176 |
llm_api_key = gr.Textbox(
|
| 177 |
label="LLM API Key๏ผDF_API_KEY๏ผ",
|
| 178 |
-
placeholder="sk-... / AIzaSy...",
|
| 179 |
type="password",
|
|
|
|
| 180 |
)
|
| 181 |
model_name = gr.Textbox(
|
| 182 |
-
label="ๆจกๅๅ็งฐ
|
| 183 |
value="gemini-2.5-pro",
|
| 184 |
-
placeholder="gemini-2.5-pro / gpt-4o / deepseek-r1",
|
| 185 |
)
|
| 186 |
|
| 187 |
-
gr.Markdown("### ๐๏ธ MinerU
|
| 188 |
mineru_api_key = gr.Textbox(
|
| 189 |
label="MinerU API Key๏ผMINERU_API_KEY๏ผ",
|
| 190 |
-
placeholder="sk2-...",
|
| 191 |
type="password",
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
max_workers = gr.Slider(
|
| 195 |
-
label="ๅนถๅ Worker ๆฐ",
|
| 196 |
-
minimum=1, maximum=30, value=5, step=1,
|
| 197 |
)
|
| 198 |
-
|
|
|
|
| 199 |
|
| 200 |
-
with gr.Column(
|
| 201 |
gr.Markdown("### ๐ค ่พๅบ")
|
| 202 |
status_box = gr.Textbox(
|
| 203 |
label="่ฟ่ก็ถๆ",
|
| 204 |
interactive=False,
|
| 205 |
lines=10,
|
| 206 |
-
placeholder="็นๅปใๅผๅงๆๅใๅ
|
| 207 |
-
)
|
| 208 |
-
output_file = gr.File(
|
| 209 |
-
label="ไธ่ฝฝๆๅ็ปๆ๏ผraw_vqa.jsonl๏ผ",
|
| 210 |
-
interactive=False,
|
| 211 |
)
|
|
|
|
| 212 |
|
| 213 |
-
gr.Markdown(
|
| 214 |
-
"""
|
| 215 |
---
|
| 216 |
-
**ไธคไธช API Key ็ๅบๅซ๏ผ**
|
| 217 |
-
|
| 218 |
| Key | ็จ้ | ็ณ่ฏทๅฐๅ |
|
| 219 |
|-----|------|---------|
|
| 220 |
-
| LLM API Key | ่ฐ็จ GPT
|
| 221 |
-
| **MinerU API Key** |
|
| 222 |
-
|
| 223 |
-
**้กน็ฎๅฐๅ**๏ผ[OpenDCAI/DataFlow-VQA](https://github.com/OpenDCAI/DataFlow-VQA)
|
| 224 |
-
"""
|
| 225 |
-
)
|
| 226 |
|
| 227 |
run_btn.click(
|
| 228 |
fn=run_vqa_extraction,
|
| 229 |
inputs=[pdf_files, task_name, api_url, llm_api_key, mineru_api_key, model_name, max_workers],
|
| 230 |
outputs=[output_file, status_box],
|
|
|
|
|
|
|
| 231 |
)
|
| 232 |
|
| 233 |
-
demo.queue()
|
| 234 |
-
|
| 235 |
if __name__ == "__main__":
|
| 236 |
demo.launch()
|
|
|
|
| 8 |
|
| 9 |
import gradio as gr
|
| 10 |
|
| 11 |
+
# Ensure repo root is on the Python path so local packages resolve correctly
|
| 12 |
+
_REPO_ROOT = os.path.dirname(os.path.abspath(__file__))
|
| 13 |
+
sys.path.insert(0, _REPO_ROOT)
|
| 14 |
|
| 15 |
+
print(f"[startup] repo root: {_REPO_ROOT}", flush=True)
|
| 16 |
+
print(f"[startup] Python: {sys.version}", flush=True)
|
| 17 |
+
print(f"[startup] Gradio: {gr.__version__}", flush=True)
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
# โโ Backend function โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 21 |
|
| 22 |
def run_vqa_extraction(
|
| 23 |
pdf_files,
|
| 24 |
+
task_name,
|
| 25 |
+
api_url,
|
| 26 |
+
llm_api_key,
|
| 27 |
+
mineru_api_key,
|
| 28 |
+
model_name,
|
| 29 |
+
max_workers,
|
| 30 |
):
|
| 31 |
+
# โโ validation โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 32 |
+
if pdf_files is None or (isinstance(pdf_files, list) and len(pdf_files) == 0):
|
| 33 |
+
return None, "โ ่ฏท่ณๅฐไธไผ ไธไธช PDF ๆไปถใ"
|
| 34 |
+
if not str(llm_api_key).strip():
|
| 35 |
+
return None, "โ ่ฏทๅกซๅ LLM API Key๏ผ็จไบ่ฐ็จๅคงๆจกๅๆๅ QA๏ผใ"
|
| 36 |
+
if not str(mineru_api_key).strip():
|
| 37 |
+
return None, "โ ่ฏทๅกซๅ MinerU API Key๏ผไธ LLM Key ๅฎๅ
จไธๅ๏ผๅป https://mineru.net/apiManage/token ็ณ่ฏท๏ผใ"
|
| 38 |
+
task_name = str(task_name).strip() or "task1"
|
| 39 |
+
|
| 40 |
+
# โโ env vars โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 41 |
+
os.environ["DF_API_KEY"] = str(llm_api_key).strip()
|
| 42 |
+
os.environ["MINERU_API_KEY"] = str(mineru_api_key).strip()
|
| 43 |
+
|
| 44 |
+
workspace = tempfile.mkdtemp(prefix="dataflow_vqa_")
|
| 45 |
+
cache_dir = os.path.join(workspace, "cache")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
os.makedirs(cache_dir, exist_ok=True)
|
| 47 |
original_cwd = os.getcwd()
|
| 48 |
|
| 49 |
try:
|
| 50 |
os.chdir(workspace)
|
| 51 |
|
| 52 |
+
# โโ copy uploaded PDFs โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 53 |
+
if not isinstance(pdf_files, list):
|
| 54 |
+
pdf_files = [pdf_files]
|
| 55 |
+
|
| 56 |
pdf_paths = []
|
| 57 |
for i, f in enumerate(pdf_files):
|
| 58 |
+
src = f if isinstance(f, str) else (
|
| 59 |
+
f.name if hasattr(f, "name") else str(f))
|
| 60 |
dst = os.path.join(workspace, f"input_{i}.pdf")
|
| 61 |
shutil.copy(src, dst)
|
| 62 |
pdf_paths.append(dst)
|
|
|
|
| 65 |
with open(input_jsonl, "w") as fout:
|
| 66 |
entry = {
|
| 67 |
"input_pdf_paths": pdf_paths if len(pdf_paths) > 1 else pdf_paths[0],
|
| 68 |
+
"name": task_name,
|
| 69 |
}
|
| 70 |
fout.write(json.dumps(entry, ensure_ascii=False) + "\n")
|
| 71 |
|
| 72 |
+
# โโ import pipeline (lazy so startup stays fast) โโโโโโโโโโโโโโโโโโ
|
|
|
|
| 73 |
try:
|
| 74 |
+
from pipelines.vqa_extract_optimized_pipeline import (
|
| 75 |
+
PDF_VQA_extract_optimized_pipeline,
|
| 76 |
+
)
|
| 77 |
except Exception:
|
| 78 |
+
return None, f"โ ๅฏผๅ
ฅ Pipeline ๅคฑ่ดฅ๏ผไพ่ตๆชๅฎ่ฃ
๏ผ๏ผ๏ผ\n{traceback.format_exc()}"
|
|
|
|
| 79 |
|
| 80 |
+
# โโ build pipeline โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 81 |
try:
|
| 82 |
pipeline = PDF_VQA_extract_optimized_pipeline(
|
| 83 |
+
input_file = input_jsonl,
|
| 84 |
+
api_url = str(api_url).rstrip("/"),
|
| 85 |
+
model_name = str(model_name),
|
| 86 |
+
max_workers = int(max_workers),
|
| 87 |
)
|
| 88 |
pipeline.compile()
|
| 89 |
except ValueError as e:
|
| 90 |
msg = str(e)
|
| 91 |
if "DF_API_KEY" in msg:
|
| 92 |
+
return None, "โ LLM API Key ๆช่ฝ่ฏปๅ๏ผ่ฏท็กฎ่ฎคๅกซๅๅ้่ฏใ"
|
| 93 |
if "MINERU_API_KEY" in msg:
|
| 94 |
+
return None, "โ MinerU API Key ๆช่ฝ่ฏปๅ๏ผ่ฏท็กฎ่ฎคๅกซๅๅ้่ฏใ"
|
| 95 |
+
return None, f"โ Pipeline ๅๅงๅๅคฑ่ดฅ๏ผ{msg}"
|
| 96 |
|
| 97 |
+
# โโ run (this blocks until all steps complete) โโโโโโโโโโโโโโโโโโโโ
|
|
|
|
| 98 |
try:
|
| 99 |
pipeline.forward()
|
| 100 |
except RuntimeError as e:
|
| 101 |
msg = str(e)
|
| 102 |
if "no api found" in msg.lower() or "Apply upload urls failed" in msg:
|
| 103 |
+
return None, (
|
| 104 |
+
"โ MinerU API Key ๆ ๆๆๅทฒ่ฟๆใ\n"
|
| 105 |
+
"่ฏทๅฐ https://mineru.net/apiManage/token ้ๆฐ็ณ่ฏทใ\n\n"
|
| 106 |
f"ๅๅง้่ฏฏ๏ผ{msg}"
|
| 107 |
+
)
|
| 108 |
if "Cannot connect to LLM server" in msg:
|
| 109 |
+
return None, f"โ ๆ ๆณ่ฟๆฅ LLM API๏ผ่ฏทๆฃๆฅ Base URLใ\n\nๅๅง้่ฏฏ๏ผ{msg}"
|
| 110 |
+
return None, f"โ Pipeline ่ฟ่กๅบ้๏ผ{msg}"
|
| 111 |
+
|
| 112 |
+
# โโ collect output โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 113 |
+
step_files = [
|
| 114 |
+
f for f in os.listdir(cache_dir)
|
| 115 |
+
if re.match(r"vqa_step\d+\.jsonl", f)
|
| 116 |
+
]
|
| 117 |
if not step_files:
|
| 118 |
+
return None, "โ Pipeline ๅฎๆไฝๆชๆพๅฐ่พๅบๆไปถ๏ผ่ฏทๆฃๆฅ HF Space ็ Runtime ๆฅๅฟใ"
|
| 119 |
|
| 120 |
+
max_step = max(
|
| 121 |
+
int(re.findall(r"vqa_step(\d+)\.jsonl", f)[0])
|
| 122 |
+
for f in step_files
|
| 123 |
+
)
|
| 124 |
max_step_file = os.path.join(cache_dir, f"vqa_step{max_step}.jsonl")
|
| 125 |
|
| 126 |
result_file = os.path.join(workspace, "raw_vqa.jsonl")
|
|
|
|
| 137 |
f_out.write(json.dumps(out, ensure_ascii=False) + "\n")
|
| 138 |
count += 1
|
| 139 |
|
| 140 |
+
return result_file, f"โ
ๅฎๆ๏ผๅ
ฑๆๅ {count} ๆก QA ๅฏน๏ผ็นๅปไธๆนไธ่ฝฝ raw_vqa.jsonlใ"
|
| 141 |
|
| 142 |
except Exception:
|
| 143 |
+
return None, f"โ ๆช็ฅ้่ฏฏ๏ผ\n{traceback.format_exc()}"
|
|
|
|
| 144 |
finally:
|
| 145 |
os.chdir(original_cwd)
|
| 146 |
|
| 147 |
|
| 148 |
+
# โโ UI โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 149 |
|
| 150 |
with gr.Blocks(title="DataFlow-VQA ยท PDF ๆๅ Demo", theme=gr.themes.Soft()) as demo:
|
| 151 |
gr.Markdown(
|
| 152 |
"""
|
| 153 |
# ๐ฌ DataFlow-VQA โ ไป PDF ๆๅ VQA ๆฐๆฎ
|
| 154 |
|
| 155 |
+
ไธไผ ๆๆๆ่ฏๅท PDF๏ผ็จ [MinerU](https://mineru.net) ่งฃๆ็้ขใๅ็จ LLM ๆๅ็ปๆๅ QA ๅฏน๏ผ่พๅบ `raw_vqa.jsonl`ใ
|
| 156 |
|
| 157 |
+
**ๆต็จ๏ผ** PDF ไธไผ โ MinerU ่งฃๆ โ LLM ๆๅ QA โ ไธ่ฝฝ็ปๆ
|
| 158 |
|
| 159 |
> ๆๆ API ่ฐ็จๅ้่ฟๆจๆไพ็ๅฏ้ฅๅฎๆ๏ผๆฌ Space ไธๅญๅจไปปไฝๆฐๆฎๆๅฏ้ฅใ
|
| 160 |
"""
|
| 161 |
)
|
| 162 |
|
| 163 |
with gr.Row():
|
| 164 |
+
with gr.Column():
|
| 165 |
gr.Markdown("### ๐ ไธไผ PDF")
|
| 166 |
pdf_files = gr.File(
|
| 167 |
+
label="ไธไผ PDF๏ผ1 ไธช๏ผ้ข็ญๆททๆ๏ผ2 ไธช๏ผ็ฌฌ1้ข็ฎ็ฌฌ2็ญๆก๏ผ",
|
| 168 |
file_types=[".pdf"],
|
| 169 |
file_count="multiple",
|
| 170 |
)
|
| 171 |
+
task_name = gr.Textbox(label="ไปปๅกๅ็งฐ", value="task1")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
|
| 173 |
+
gr.Markdown("### โ๏ธ LLM ้
็ฝฎ")
|
| 174 |
api_url = gr.Textbox(
|
| 175 |
label="API Base URL",
|
| 176 |
value="https://generativelanguage.googleapis.com/v1beta/openai/",
|
|
|
|
| 177 |
)
|
| 178 |
llm_api_key = gr.Textbox(
|
| 179 |
label="LLM API Key๏ผDF_API_KEY๏ผ",
|
|
|
|
| 180 |
type="password",
|
| 181 |
+
placeholder="sk-... / AIzaSy...",
|
| 182 |
)
|
| 183 |
model_name = gr.Textbox(
|
| 184 |
+
label="ๆจกๅๅ็งฐ",
|
| 185 |
value="gemini-2.5-pro",
|
|
|
|
| 186 |
)
|
| 187 |
|
| 188 |
+
gr.Markdown("### ๐๏ธ MinerU ้
็ฝฎ")
|
| 189 |
mineru_api_key = gr.Textbox(
|
| 190 |
label="MinerU API Key๏ผMINERU_API_KEY๏ผ",
|
|
|
|
| 191 |
type="password",
|
| 192 |
+
placeholder="sk2-...",
|
| 193 |
+
info="็ฌ็ซไบ LLM ็ Key๏ผๅป https://mineru.net/apiManage/token ๅ
่ดน็ณ่ฏท",
|
|
|
|
|
|
|
|
|
|
| 194 |
)
|
| 195 |
+
max_workers = gr.Slider(label="ๅนถๅๆฐ", minimum=1, maximum=30, value=5, step=1)
|
| 196 |
+
run_btn = gr.Button("โถ ๅผๅงๆๅ", variant="primary")
|
| 197 |
|
| 198 |
+
with gr.Column():
|
| 199 |
gr.Markdown("### ๐ค ่พๅบ")
|
| 200 |
status_box = gr.Textbox(
|
| 201 |
label="่ฟ่ก็ถๆ",
|
| 202 |
interactive=False,
|
| 203 |
lines=10,
|
| 204 |
+
placeholder="็นๅปใๅผๅงๆๅใๅ็ถๆๆพ็คบๅจ่ฟ้๏ผ่ฟ่ก้ๆฐๅ้๏ผ่ฏท่ๅฟ็ญๅพ
๏ผโฆ",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
)
|
| 206 |
+
output_file = gr.File(label="ไธ่ฝฝ็ปๆ๏ผraw_vqa.jsonl๏ผ", interactive=False)
|
| 207 |
|
| 208 |
+
gr.Markdown("""
|
|
|
|
| 209 |
---
|
|
|
|
|
|
|
| 210 |
| Key | ็จ้ | ็ณ่ฏทๅฐๅ |
|
| 211 |
|-----|------|---------|
|
| 212 |
+
| LLM API Key | ่ฐ็จ GPT/Gemini ็ญๅคงๆจกๅๆๅ QA | ๅฏนๅบ LLM ๆๅกๅ |
|
| 213 |
+
| **MinerU API Key** | ่งฃๆ PDF ็้ข๏ผไธ LLM ๅฎๅ
จ็ฌ็ซ๏ผ | [mineru.net/apiManage/token](https://mineru.net/apiManage/token) |
|
| 214 |
+
""")
|
|
|
|
|
|
|
|
|
|
| 215 |
|
| 216 |
run_btn.click(
|
| 217 |
fn=run_vqa_extraction,
|
| 218 |
inputs=[pdf_files, task_name, api_url, llm_api_key, mineru_api_key, model_name, max_workers],
|
| 219 |
outputs=[output_file, status_box],
|
| 220 |
+
api_name="run_vqa_extraction",
|
| 221 |
+
show_progress="full",
|
| 222 |
)
|
| 223 |
|
|
|
|
|
|
|
| 224 |
if __name__ == "__main__":
|
| 225 |
demo.launch()
|