BabyLM-community
/

babylm-multimodal-baseline-git-old

Image-Text-to-Text

Model card Files Files and versions

babylm-multimodal-baseline-git-old / processor_git.py

momergul's picture

Upload processor_git.py with huggingface_hub

95eb69c verified 7 months ago

history blame contribute delete

2.12 kB

	from transformers import ProcessorMixin, AutoProcessor
	from transformers.models.auto.processing_auto import AutoProcessor
	from transformers.processing_utils import ProcessorMixin
	from transformers.tokenization_utils_base import BatchEncoding
	import json
	import os

	class GITProcessor(ProcessorMixin):
	"""
	Custom processor that combines a tokenizer and feature extractor.
	"""
	attributes = ["image_processor", "tokenizer"]
	image_processor_class = "AutoImageProcessor"
	tokenizer_class = "AutoTokenizer"

	def __init__(self, image_processor, tokenizer):
	super().__init__(image_processor, tokenizer)

	def __call__(self, text=None, images=None, **kwargs):
	"""
	Main processing method that handles both text and images.

	Args:
	text: Text input(s) to tokenize
	images: Image input(s) to process
	**kwargs: Additional arguments passed to tokenizer/image_processor

	Returns:
	Dictionary with processed inputs
	"""
	if text is None and images is None:
	raise ValueError("You need to specify either text or images")

	encoding = {}

	# Process text if provided
	if text is not None:
	text_encoding = self.tokenizer(text, **kwargs)
	encoding.update(text_encoding)

	# Process images if provided
	if images is not None:
	image_encoding = self.image_processor(images, **kwargs)
	# Add prefix to avoid key conflicts
	for key, value in image_encoding.items():
	encoding[f"pixel_values" if key == "pixel_values" else f"image_{key}"] = value

	return BatchEncoding(encoding)

	def batch_decode(self, args, *kwargs):
	"""
	Delegate batch decoding to the tokenizer.
	"""
	return self.tokenizer.batch_decode(args, *kwargs)

	def decode(self, args, *kwargs):
	"""
	Delegate decoding to the tokenizer.
	"""
	return self.tokenizer.decode(args, *kwargs)