nielsr HF staff commited on
Commit
5bbae38
1 Parent(s): 3f6c7a7

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +12 -15
README.md CHANGED
@@ -50,28 +50,25 @@ Though not the focus of this model, we did evaluate it on standard image underst
50
 
51
  You can load the model and perform inference as follows:
52
  ```python
53
- from transformers import FuyuForCausalLM, AutoTokenizer, FuyuProcessor, FuyuImageProcessor
54
  from PIL import Image
55
 
56
- # load model, tokenizer, and processor
57
- pretrained_path = "adept/fuyu-8b"
58
- tokenizer = AutoTokenizer.from_pretrained(pretrained_path)
 
59
 
60
- image_processor = FuyuImageProcessor()
61
- processor = FuyuProcessor(image_processor=image_processor, tokenizer=tokenizer)
62
-
63
- model = FuyuForCausalLM.from_pretrained(pretrained_path, device_map="cuda:0")
64
-
65
- # test inference
66
  text_prompt = "Generate a coco-style caption.\n"
67
  image_path = "bus.png" # https://huggingface.co/adept-hf-collab/fuyu-8b/blob/main/bus.png
68
- image_pil = Image.open(image_path)
69
 
70
- model_inputs = processor(text=text_prompt, images=[image_pil], device="cuda:0")
71
- for k, v in model_inputs.items():
72
- model_inputs[k] = v.to("cuda:0")
73
 
74
- generation_output = model.generate(**model_inputs, max_new_tokens=7)
 
75
  generation_text = processor.batch_decode(generation_output[:, -7:], skip_special_tokens=True)
76
  assert generation_text == ['A bus parked on the side of a road.']
77
  ```
 
50
 
51
  You can load the model and perform inference as follows:
52
  ```python
53
+ from transformers import FuyuProcessor, FuyuForCausalLM
54
  from PIL import Image
55
 
56
+ # load model and processor
57
+ model_id = "adept/fuyu-8b"
58
+ processor = FuyuProcessor.from_pretrained(model_id)
59
+ model = FuyuForCausalLM.from_pretrained(model_id, device_map="cuda:0")
60
 
61
+ # prepare inputs for the model
 
 
 
 
 
62
  text_prompt = "Generate a coco-style caption.\n"
63
  image_path = "bus.png" # https://huggingface.co/adept-hf-collab/fuyu-8b/blob/main/bus.png
64
+ image = Image.open(image_path)
65
 
66
+ inputs = processor(text=text_prompt, images=image, return_tensors="pt")
67
+ for k, v in inputs.items():
68
+ inputs[k] = v.to("cuda:0")
69
 
70
+ # autoregressively generate text
71
+ generation_output = model.generate(**inputs, max_new_tokens=7)
72
  generation_text = processor.batch_decode(generation_output[:, -7:], skip_special_tokens=True)
73
  assert generation_text == ['A bus parked on the side of a road.']
74
  ```