visheratin commited on
Commit
9c984a5
1 Parent(s): 8022f13

Delete processing_llava.py

Browse files
Files changed (1) hide show
  1. processing_llava.py +0 -101
processing_llava.py DELETED
@@ -1,101 +0,0 @@
1
- # coding=utf-8
2
- # Copyright 2023 The HuggingFace Inc. team.
3
- #
4
- # Licensed under the Apache License, Version 2.0 (the "License");
5
- # you may not use this file except in compliance with the License.
6
- # You may obtain a copy of the License at
7
- #
8
- # http://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS,
12
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- # See the License for the specific language governing permissions and
14
- # limitations under the License.
15
- """
16
- Processor class for Llava.
17
- """
18
-
19
-
20
- from typing import List, Optional, Union
21
-
22
- from transformers.feature_extraction_utils import BatchFeature
23
- from transformers.image_utils import ImageInput
24
- from transformers.tokenization_utils_base import (
25
- PaddingStrategy,
26
- PreTokenizedInput,
27
- TextInput,
28
- TruncationStrategy,
29
- )
30
- from transformers.utils import TensorType
31
- import torch
32
- from open_clip.transform import PreprocessCfg, image_transform_v2
33
-
34
-
35
- class OpenCLIPImageProcessor:
36
- def __init__(self, config):
37
- cfg = PreprocessCfg(**config)
38
- transform = image_transform_v2(cfg=cfg, is_train=False)
39
- self.transform = transform
40
-
41
- def __call__(self, image, return_tensors):
42
- if isinstance(image, list):
43
- outputs = []
44
- for item in image:
45
- outputs.append(self.transform(item))
46
- return {
47
- "pixel_values": torch.tensor(outputs),
48
- }
49
- output = self.transform(image)
50
- return {
51
- "pixel_values": output.unsqueeze(0),
52
- }
53
-
54
- @property
55
- def model_input_names(self):
56
- return ["pixel_values"]
57
-
58
-
59
- class LlavaProcessor:
60
- def __init__(self, image_processor: OpenCLIPImageProcessor, tokenizer):
61
- self.image_processor = image_processor
62
- self.tokenizer = tokenizer
63
-
64
- def __call__(
65
- self,
66
- text: Union[
67
- TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]
68
- ] = None,
69
- images: ImageInput = None,
70
- padding: Union[bool, str, PaddingStrategy] = False,
71
- truncation: Union[bool, str, TruncationStrategy] = None,
72
- max_length=None,
73
- return_tensors: Optional[Union[str, TensorType]] = TensorType.PYTORCH,
74
- ) -> BatchFeature:
75
- if images is not None:
76
- pixel_values = self.image_processor(images, return_tensors=return_tensors)[
77
- "pixel_values"
78
- ]
79
- else:
80
- pixel_values = None
81
- text_inputs = self.tokenizer(
82
- text,
83
- return_tensors=return_tensors,
84
- padding=padding,
85
- truncation=truncation,
86
- max_length=max_length,
87
- )
88
-
89
- return BatchFeature(data={**text_inputs, "pixel_values": pixel_values})
90
-
91
- def batch_decode(self, *args, **kwargs):
92
- return self.tokenizer.batch_decode(*args, **kwargs)
93
-
94
- def decode(self, *args, **kwargs):
95
- return self.tokenizer.decode(*args, **kwargs)
96
-
97
- @property
98
- def model_input_names(self):
99
- tokenizer_input_names = self.tokenizer.model_input_names
100
- image_processor_input_names = self.image_processor.model_input_names
101
- return list(dict.fromkeys(tokenizer_input_names + image_processor_input_names))