benchang1110 commited on
Commit
2ccd128
1 Parent(s): 37e7bdb

Upload processor

Browse files
Files changed (1) hide show
  1. processing_taivisionlm.py +24 -24
processing_taivisionlm.py CHANGED
@@ -289,28 +289,28 @@ class TaiVisionProcessor(ProcessorMixin):
289
 
290
 
291
 
292
- if __name__ == '__main__':
293
- from configuration_taivisionlm import TaiVisionLMConfig
294
- import transformers
295
- import torch
296
- config = TaiVisionLMConfig.from_pretrained("./")
297
- preprocessor = transformers.SiglipImageProcessor.from_pretrained("google/siglip-base-patch16-224")
298
- preprocessor.image_seq_length = config.num_image_tokens
299
- tokenizer = transformers.AutoTokenizer.from_pretrained("benchang1110/Taiwan-tinyllama-v1.0-chat")
300
- processor = TaiVisionProcessor(tokenizer=tokenizer, image_processor=preprocessor)
301
- processor.save_pretrained("./")
302
 
303
- from PIL import Image
304
- import requests
305
- processor = TaiVisionProcessor.from_pretrained("./")
306
- url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg"
307
- image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
308
- text = "Hello< what is your name?"
309
- suffix = "I am fine, thank you."
310
- inputs = processor(text=text,suffix=suffix,images=image, return_tensors="pt",padding="max_length",max_length=512)
311
- print(inputs['attention_mask'].shape)
312
- print(inputs['input_ids'].shape)
313
- print(inputs['token_type_ids'].shape)
314
- # print number of 0 in token_type_ids
315
- print(torch.sum(inputs['token_type_ids']==0))
316
- print(inputs)
 
289
 
290
 
291
 
292
+ # if __name__ == '__main__':
293
+ # from configuration_taivisionlm import TaiVisionLMConfig
294
+ # import transformers
295
+ # import torch
296
+ # config = TaiVisionLMConfig.from_pretrained("./")
297
+ # preprocessor = transformers.SiglipImageProcessor.from_pretrained("google/siglip-base-patch16-224")
298
+ # preprocessor.image_seq_length = config.num_image_tokens
299
+ # tokenizer = transformers.AutoTokenizer.from_pretrained("benchang1110/Taiwan-tinyllama-v1.0-chat")
300
+ # processor = TaiVisionProcessor(tokenizer=tokenizer, image_processor=preprocessor)
301
+ # processor.save_pretrained("./")
302
 
303
+ # from PIL import Image
304
+ # import requests
305
+ # processor = TaiVisionProcessor.from_pretrained("./")
306
+ # url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg"
307
+ # image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
308
+ # text = "Hello< what is your name?"
309
+ # suffix = "I am fine, thank you."
310
+ # inputs = processor(text=text,suffix=suffix,images=image, return_tensors="pt",padding="max_length",max_length=512)
311
+ # print(inputs['attention_mask'].shape)
312
+ # print(inputs['input_ids'].shape)
313
+ # print(inputs['token_type_ids'].shape)
314
+ # # print number of 0 in token_type_ids
315
+ # print(torch.sum(inputs['token_type_ids']==0))
316
+ # print(inputs)