File size: 1,802 Bytes
11b81b8 b4627e7 4119334 11b81b8 41e5269 cc56bee d17454c cc56bee 41e5269 cc56bee 41e5269 cc56bee 41e5269 cc56bee 41e5269 cc56bee 41e5269 cc56bee 41e5269 cc56bee 41e5269 cc56bee 7f67ff1 cc56bee 41e5269 cc56bee 41e5269 cc56bee 28cede3 41e5269 cc56bee 74edd87 cc56bee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
---
license: apache-2.0
language:
- en
library_name: transformers
pipeline_tag: image-text-to-text
tags:
- art
base_model: microsoft/Florence-2-base
datasets:
- kadirnar/fluxdev_controlnet_16k
---
```
pip install -q torch==2.4.0 datasets flash_attn timm einops
```
```python
from transformers import AutoModelForCausalLM, AutoProcessor, AutoConfig
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModelForCausalLM.from_pretrained("gokaygokay/Florence-2-Flux", trust_remote_code=True).to(device).eval()
processor = AutoProcessor.from_pretrained("gokaygokay/Florence-2-Flux", trust_remote_code=True)
# Function to run the model on an example
def run_example(task_prompt, text_input, image):
prompt = task_prompt + text_input
# Ensure the image is in RGB mode
if image.mode != "RGB":
image = image.convert("RGB")
inputs = processor(text=prompt, images=image, return_tensors="pt").to(device)
generated_ids = model.generate(
input_ids=inputs["input_ids"],
pixel_values=inputs["pixel_values"],
max_new_tokens=1024,
num_beams=3,
repetition_penalty=1.10,
)
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
parsed_answer = processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.width, image.height))
return parsed_answer
from PIL import Image
import requests
import copy
url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg?download=true"
image = Image.open(requests.get(url, stream=True).raw)
answer = run_example("<DESCRIPTION>", "Describe this image in great detail.", image)
final_answer = answer["<DESCRIPTION>"]
print(final_answer)
``` |