Spaces:
Runtime error
Runtime error
uodate app py
Browse files
app.py
CHANGED
|
@@ -1,23 +1,41 @@
|
|
| 1 |
import os
|
| 2 |
-
|
| 3 |
-
os.system("pip install gradio==4.0.0 transformers==4.30.2 torch==1.13.1 pillow==9.4.0")
|
| 4 |
-
|
| 5 |
import gradio as gr
|
| 6 |
-
|
| 7 |
from PIL import Image
|
|
|
|
| 8 |
|
| 9 |
-
#
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
-
def
|
| 13 |
-
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
-
|
| 17 |
-
|
|
|
|
| 18 |
image_input = gr.Image(type="pil", label="上传图片")
|
| 19 |
-
text_output = gr.Textbox(label="
|
| 20 |
-
gr.Button("
|
| 21 |
|
| 22 |
if __name__ == "__main__":
|
| 23 |
demo.launch()
|
|
|
|
| 1 |
import os
|
| 2 |
+
os.system("pip install gradio==4.0.0 transformers==4.36.2 torch==2.0.1 pillow==9.4.0 accelerate==0.30.0 bitsandbytes==0.43.0")
|
|
|
|
|
|
|
| 3 |
import gradio as gr
|
| 4 |
+
import torch
|
| 5 |
from PIL import Image
|
| 6 |
+
from transformers import AutoProcessor, AutoModelForVisionAndLanguageGeneration
|
| 7 |
|
| 8 |
+
# 加载DAM-3B模型和处理器
|
| 9 |
+
model_name = "nvidia/DAM-3B"
|
| 10 |
+
processor = AutoProcessor.from_pretrained(model_name)
|
| 11 |
+
# 4-bit量化适配免费空间,降低内存占用
|
| 12 |
+
model = AutoModelForVisionAndLanguageGeneration.from_pretrained(
|
| 13 |
+
model_name,
|
| 14 |
+
torch_dtype=torch.float16,
|
| 15 |
+
load_in_4bit=True,
|
| 16 |
+
device_map="auto"
|
| 17 |
+
)
|
| 18 |
|
| 19 |
+
def generate_detailed_caption(image):
|
| 20 |
+
# 模拟全图描述(DAM-3B支持区域指定,此处简化为全图细节描述)
|
| 21 |
+
inputs = processor(images=image, return_tensors="pt").to(model.device, torch.float16)
|
| 22 |
+
# 生成详细描述,设置长文本参数
|
| 23 |
+
outputs = model.generate(
|
| 24 |
+
**inputs,
|
| 25 |
+
max_length=200, # 延长描述长度,保留更多细节
|
| 26 |
+
num_beams=4, # 束搜索提升描述连贯性
|
| 27 |
+
no_repeat_ngram_size=3, # 避免重复内容
|
| 28 |
+
early_stopping=True
|
| 29 |
+
)
|
| 30 |
+
caption = processor.decode(outputs[0], skip_special_tokens=True)
|
| 31 |
+
return f"图像细节描述:{caption}"
|
| 32 |
|
| 33 |
+
# 构建Gradio界面
|
| 34 |
+
with gr.Blocks(title="图像细节描述工具") as demo:
|
| 35 |
+
gr.Markdown("# 图像细节描述工具(DAM-3B优化版)")
|
| 36 |
image_input = gr.Image(type="pil", label="上传图片")
|
| 37 |
+
text_output = gr.Textbox(label="生成细节描述", lines=5)
|
| 38 |
+
gr.Button("生成详细描述").click(fn=generate_detailed_caption, inputs=image_input, outputs=text_output)
|
| 39 |
|
| 40 |
if __name__ == "__main__":
|
| 41 |
demo.launch()
|