BK-Lee
/

TroL-3.8B

+---
+license: mit
+---
+# Two steps only need.
+First step.
+```shell
+git clone https://github.com/ByungKwanLee/TroL
+bash install
+```
+Second step.
+```python
+import torch
+from config import *
+from PIL import Image
+from utils.utils import *
+import torch.nn.functional as F
+from trol.load_trol import load_trol
+from torchvision.transforms.functional import pil_to_tensor
+# model selection
+link = "TroL-3.8B" # [Select One] 'TroL-1.8B' | 'TroL-3.8B' | 'TroL-7B'
+# User prompt
+prompt_type="with_image" # Select one option "text_only", "with_image"
+img_path='figures/demo.png'
+question="What is the troll doing? Provide the detail in the image and imagine what the event happens."
+# loading model
+model, tokenizer = load_trol(link=link)
+# cpu -> gpu
+for param in model.parameters():
+    if not param.is_cuda:
+        param.data = param.to('cuda:0')
+# prompt type -> input prompt
+image_token_number = None
+if prompt_type == 'with_image':
+    # Image Load
+    image = pil_to_tensor(Image.open(img_path).convert("RGB"))
+    if not "3.8B" in link:
+        image_token_number = 1225
+        image = F.interpolate(image.unsqueeze(0), size=(490, 490), mode='bicubic').squeeze(0)
+    inputs = [{'image': image, 'question': question}]
+elif prompt_type=='text_only':
+    inputs = [{'question': question}]
+# Generate
+with torch.inference_mode():
+    _inputs = model.eval_process(inputs=inputs,
+                                 data='demo',
+                                 tokenizer=tokenizer,
+                                 device='cuda:0',
+                                 img_token_number=image_token_number)
+    generate_ids = model.generate(**_inputs, max_new_tokens=256, use_cache=True)
+    response = output_filtering(tokenizer.batch_decode(generate_ids, skip_special_tokens=False)[0], model)
+print(response)
+```
+So easy Let's say TroL!