|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import bisect |
|
from typing import TYPE_CHECKING, List, Sequence, Tuple |
|
|
|
from ...extras.packages import is_pillow_available |
|
|
|
|
|
if is_pillow_available(): |
|
from PIL import Image |
|
|
|
|
|
if TYPE_CHECKING: |
|
from numpy.typing import NDArray |
|
from PIL.Image import Image as ImageObject |
|
from transformers import ProcessorMixin |
|
from transformers.image_processing_utils import BaseImageProcessor |
|
|
|
|
|
def search_for_fit(numbers: Sequence[int], capacity: int) -> int: |
|
r""" |
|
Finds the index of largest number that fits into the knapsack with the given capacity. |
|
""" |
|
index = bisect.bisect(numbers, capacity) |
|
return -1 if index == 0 else (index - 1) |
|
|
|
|
|
def greedy_knapsack(numbers: List[int], capacity: int) -> List[List[int]]: |
|
r""" |
|
An efficient greedy algorithm with binary search for the knapsack problem. |
|
""" |
|
numbers.sort() |
|
knapsacks = [] |
|
|
|
while numbers: |
|
current_knapsack = [] |
|
remaining_capacity = capacity |
|
|
|
while True: |
|
index = search_for_fit(numbers, remaining_capacity) |
|
if index == -1: |
|
break |
|
|
|
remaining_capacity -= numbers[index] |
|
current_knapsack.append(numbers.pop(index)) |
|
|
|
knapsacks.append(current_knapsack) |
|
|
|
return knapsacks |
|
|
|
|
|
def get_pixel_values(images: Sequence["ImageObject"], processor: "ProcessorMixin") -> "NDArray": |
|
r""" |
|
Processes visual inputs. (currently only supports a single image) |
|
""" |
|
image_processor: "BaseImageProcessor" = getattr(processor, "image_processor") |
|
image = images[0] if len(images) != 0 else Image.new("RGB", (100, 100), (255, 255, 255)) |
|
return image_processor(image, return_tensors="pt")["pixel_values"][0] |
|
|
|
|
|
def get_paligemma_token_type_ids(input_len: int, processor: "ProcessorMixin") -> List[int]: |
|
r""" |
|
Gets paligemma token type ids for computing loss. |
|
""" |
|
image_seq_length = getattr(processor, "image_seq_length") |
|
return [0] * image_seq_length + [1] * (input_len - image_seq_length) |
|
|
|
|
|
def infer_seqlen(source_len: int, target_len: int, cutoff_len: int) -> Tuple[int, int]: |
|
r""" |
|
Computes the real sequence length after truncation by the cutoff_len. |
|
""" |
|
if target_len * 2 < cutoff_len: |
|
max_target_len = cutoff_len |
|
elif source_len * 2 < cutoff_len: |
|
max_target_len = cutoff_len - source_len |
|
else: |
|
max_target_len = int(cutoff_len * (target_len / (source_len + target_len))) |
|
|
|
new_target_len = min(max_target_len, target_len) |
|
max_source_len = max(cutoff_len - new_target_len, 0) |
|
new_source_len = min(max_source_len, source_len) |
|
return new_source_len, new_target_len |
|
|