torettomarui commited on
Commit
2584554
·
verified ·
1 Parent(s): b7f90d3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -0
app.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoModel, AutoTokenizer
3
+ import torch
4
+ import torchvision.transforms as T
5
+ from torchvision.transforms.functional import InterpolationMode
6
+
7
+
8
+
9
+ IMAGENET_MEAN = (0.485, 0.456, 0.406)
10
+ IMAGENET_STD = (0.229, 0.224, 0.225)
11
+
12
+
13
+ model_name = "model"
14
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, use_fast=False)
15
+ model = AutoModel.from_pretrained(
16
+ model_name,
17
+ torch_dtype=torch.bfloat16,
18
+ trust_remote_code=True,
19
+ ).to(torch.bfloat16).eval().cuda()
20
+
21
+ def build_transform(input_size):
22
+ MEAN, STD = IMAGENET_MEAN, IMAGENET_STD
23
+ transform = T.Compose([
24
+ T.Lambda(lambda img: img.convert('RGB') if img.mode != 'RGB' else img),
25
+ T.Resize((input_size, input_size), interpolation=InterpolationMode.BICUBIC),
26
+ T.ToTensor(),
27
+ T.Normalize(mean=MEAN, std=STD)
28
+ ])
29
+ return transform
30
+
31
+
32
+ def preprocess_image(file_path, image_size=448):
33
+
34
+ transform = build_transform(image_size)
35
+ pixel_values = transform(file_path)
36
+ return torch.stack([pixel_values]).to(torch.bfloat16).cuda()
37
+
38
+ def generate_response(image, text):
39
+
40
+ pixel_values = preprocess_image(image, dynamic=True)
41
+
42
+ generation_config = dict(max_new_tokens=2048, do_sample=False)
43
+
44
+ question = '<image>\n' + text
45
+
46
+ response = model.chat(tokenizer, pixel_values, question, generation_config)
47
+
48
+ return response
49
+
50
+ iface = gr.Interface(
51
+ fn=generate_response,
52
+ inputs=[
53
+ gr.Image(type="pil", label="上传图片"),
54
+ gr.Textbox(lines=2, placeholder="输入你的问题..."),
55
+ ],
56
+ outputs="text",
57
+ title="Llava-QW",
58
+ description="上传一张图片并输入你的问题,模型将生成相应的回答。",
59
+ )
60
+
61
+ iface.launch()