Im-ai123 commited on
Commit
98ec970
·
verified ·
1 Parent(s): 40e72bf

uodate app py

Browse files
Files changed (1) hide show
  1. app.py +31 -13
app.py CHANGED
@@ -1,23 +1,41 @@
1
  import os
2
- # 强制安装依赖
3
- os.system("pip install gradio==4.0.0 transformers==4.30.2 torch==1.13.1 pillow==9.4.0")
4
-
5
  import gradio as gr
6
- from transformers import pipeline
7
  from PIL import Image
 
8
 
9
- # 加载模型
10
- image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
 
 
 
 
 
 
 
 
11
 
12
- def generate_caption(image):
13
- result = image_to_text(image)[0]
14
- return f"图像描述:{result['generated_text']}"
 
 
 
 
 
 
 
 
 
 
15
 
16
- with gr.Blocks(title="图像文本描述工具") as demo:
17
- gr.Markdown("# 图像文本描述工具")
 
18
  image_input = gr.Image(type="pil", label="上传图片")
19
- text_output = gr.Textbox(label="生成描述")
20
- gr.Button("生成").click(fn=generate_caption, inputs=image_input, outputs=text_output)
21
 
22
  if __name__ == "__main__":
23
  demo.launch()
 
1
  import os
2
+ os.system("pip install gradio==4.0.0 transformers==4.36.2 torch==2.0.1 pillow==9.4.0 accelerate==0.30.0 bitsandbytes==0.43.0")
 
 
3
  import gradio as gr
4
+ import torch
5
  from PIL import Image
6
+ from transformers import AutoProcessor, AutoModelForVisionAndLanguageGeneration
7
 
8
+ # 加载DAM-3B模型和处理器
9
+ model_name = "nvidia/DAM-3B"
10
+ processor = AutoProcessor.from_pretrained(model_name)
11
+ # 4-bit量化适配免费空间,降低内存占用
12
+ model = AutoModelForVisionAndLanguageGeneration.from_pretrained(
13
+ model_name,
14
+ torch_dtype=torch.float16,
15
+ load_in_4bit=True,
16
+ device_map="auto"
17
+ )
18
 
19
+ def generate_detailed_caption(image):
20
+ # 模拟全图描述(DAM-3B支持区域指定,此处简化为全图细节描述)
21
+ inputs = processor(images=image, return_tensors="pt").to(model.device, torch.float16)
22
+ # 生成详细描述,设置长文本参数
23
+ outputs = model.generate(
24
+ **inputs,
25
+ max_length=200, # 延长描述长度,保留更多细节
26
+ num_beams=4, # 束搜索提升描述连贯性
27
+ no_repeat_ngram_size=3, # 避免重复内容
28
+ early_stopping=True
29
+ )
30
+ caption = processor.decode(outputs[0], skip_special_tokens=True)
31
+ return f"图像细节描述:{caption}"
32
 
33
+ # 构建Gradio界面
34
+ with gr.Blocks(title="图像细节描述工具") as demo:
35
+ gr.Markdown("# 图像细节描述工具(DAM-3B优化版)")
36
  image_input = gr.Image(type="pil", label="上传图片")
37
+ text_output = gr.Textbox(label="生成细节描述", lines=5)
38
+ gr.Button("生成详细描述").click(fn=generate_detailed_caption, inputs=image_input, outputs=text_output)
39
 
40
  if __name__ == "__main__":
41
  demo.launch()