File size: 1,110 Bytes
c378480 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
from PIL import Image
from transformers import AutoTokenizer
from pydantic import BaseModel
from enum import Enum
from moonline import Moonline
def main():
class Mood(Enum):
sad = "sad"
happy = "happy"
angry = "angry"
neutral = "neutral"
class ExampleModel(BaseModel):
description: str
mood: Mood
prompt = f"""
Your job is to describe the image.
Please answer in json with the following format: {ExampleModel.__annotations__}
"""
image_path = "example.png"
prompt = prompt
model_id = "vikhyatk/moondream2"
revision = "2024-04-02"
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
moonline = Moonline.from_pretrained(
model_id,
revision=revision,
).to()
moonline.eval()
image = Image.open(image_path)
image_embeds = moonline.encode_image(image)
fsm = moonline.generate_fsm(ExampleModel, tokenizer)
answer = moonline.answer_question(image_embeds, prompt, tokenizer, fsm)
print(f"answer: {answer}")
if __name__ == "__main__":
main() |