yuanzhoulvpi commited on
Commit
1da63f7
1 Parent(s): 158dd4e

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +59 -0
README.md CHANGED
@@ -1,3 +1,62 @@
1
  ---
2
  license: mit
 
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: mit
3
+ language:
4
+ - zh
5
+ tags:
6
+ - gpt2
7
+ - vit
8
  ---
9
+
10
+
11
+
12
+ # 推理代码
13
+ # infer
14
+
15
+ ```python
16
+ from transformers import (VisionEncoderDecoderModel,
17
+ AutoTokenizer,ViTImageProcessor)
18
+ import torch
19
+ from PIL import Image
20
+
21
+ ```
22
+
23
+
24
+ ```python
25
+ vision_encoder_decoder_model_name_or_path = "yuanzhoulvpi/vit-gpt2-image-chinese-captioning"#"vit-gpt2-image-chinese-captioning/checkpoint-3200"
26
+
27
+ processor = ViTImageProcessor.from_pretrained(vision_encoder_decoder_model_name_or_path)
28
+ tokenizer = AutoTokenizer.from_pretrained(vision_encoder_decoder_model_name_or_path)
29
+ model = VisionEncoderDecoderModel.from_pretrained(vision_encoder_decoder_model_name_or_path)
30
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
31
+ model.to(device)
32
+ ```
33
+
34
+
35
+ ```python
36
+ max_length = 16
37
+ num_beams = 4
38
+ gen_kwargs = {"max_length": max_length, "num_beams": num_beams}
39
+
40
+
41
+ def predict_step(image_paths):
42
+ images = []
43
+ for image_path in image_paths:
44
+ i_image = Image.open(image_path)
45
+ if i_image.mode != "RGB":
46
+ i_image = i_image.convert(mode="RGB")
47
+
48
+ images.append(i_image)
49
+
50
+ pixel_values = processor(images=images, return_tensors="pt").pixel_values
51
+ pixel_values = pixel_values.to(device)
52
+
53
+ output_ids = model.generate(pixel_values, **gen_kwargs)
54
+
55
+ preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
56
+ preds = [pred.strip() for pred in preds]
57
+ return preds
58
+
59
+
60
+ predict_step(['bigdata/image_data/train-1000200.jpg'])
61
+
62
+ ```