Update README.md
Browse files
README.md
CHANGED
@@ -26,21 +26,36 @@ We refined the model on the dataset with descriptions and movie posters by russi
|
|
26 |
- **Repository:** [github.com/slivka83](https://github.com/slivka83/)
|
27 |
- **Demo [optional]:** [@MPC_project_bot](https://t.me/MPC_project_bot)
|
28 |
|
29 |
-
#
|
30 |
|
31 |
-
|
|
|
32 |
|
33 |
-
|
|
|
|
|
34 |
|
35 |
-
|
|
|
|
|
36 |
|
37 |
-
|
38 |
|
39 |
-
|
|
|
|
|
|
|
|
|
40 |
|
41 |
-
|
|
|
42 |
|
43 |
-
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
# Bias, Risks, and Limitations
|
46 |
|
|
|
26 |
- **Repository:** [github.com/slivka83](https://github.com/slivka83/)
|
27 |
- **Demo [optional]:** [@MPC_project_bot](https://t.me/MPC_project_bot)
|
28 |
|
29 |
+
# How to use
|
30 |
|
31 |
+
```python
|
32 |
+
from transformers import AutoTokenizer, AutoModel
|
33 |
|
34 |
+
tokenizer = AutoTokenizer.from_pretrained("dumperize/movie-picture-captioning")
|
35 |
+
feature_extractor = ViTFeatureExtractor.from_pretrained("dumperize/movie-picture-captioning")
|
36 |
+
model = VisionEncoderDecoderModel.from_pretrained("dumperize/movie-picture-captioning")
|
37 |
|
38 |
+
max_length = 128
|
39 |
+
num_beams = 4
|
40 |
+
gen_kwargs = {"max_length": max_length, "num_beams": num_beams}
|
41 |
|
42 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
43 |
|
44 |
+
image_path = 'path/to/image.jpg';
|
45 |
+
image = Image.open(image_path)
|
46 |
+
image = image.resize([224,224])
|
47 |
+
if image.mode != "RGB":
|
48 |
+
image = image.convert(mode="RGB")
|
49 |
|
50 |
+
pixel_values = feature_extractor(images=[image], return_tensors="pt").pixel_values
|
51 |
+
pixel_values = pixel_values.to(device)
|
52 |
|
53 |
+
output_ids = model.generate(pixel_values, **gen_kwargs)
|
54 |
+
|
55 |
+
preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
|
56 |
+
print([pred.strip() for pred in preds])
|
57 |
+
|
58 |
+
```
|
59 |
|
60 |
# Bias, Risks, and Limitations
|
61 |
|