File size: 1,251 Bytes
1d55ece
 
eb65490
b3dfb2c
4473c1b
eb65490
c8ff0d2
 
b3dfb2c
f65f370
 
 
4e4112c
4473c1b
 
 
 
c8ff0d2
eb65490
 
 
 
 
 
c8ff0d2
eb65490
857f666
 
d979bb9
 
1d55ece
 
 
 
 
d979bb9
 
 
 
1d55ece
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import transformers
import torch
import einops
import streamlit as st
from PIL import Image
from transformers import AutoModelForCausalLM, AutoTokenizer

DEVICE = "cuda:0"

# Add a header
st.title("BEST Story Teller...Ever!")

uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "png", "jpeg"])

if uploaded_file is not None:
    image = Image.open(uploaded_file)
    st.image(image, caption='Uploaded Image.', use_column_width=True)

    model_id = "vikhyatk/moondream2"
    revision = "2024-05-08"
    model = AutoModelForCausalLM.from_pretrained(
        model_id, trust_remote_code=True, revision=revision
    )
    tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)

    enc_image = model.encode_image(image)
    answer = model.answer_question(enc_image, "Describe this image in a few sentences.", tokenizer)
    
    st.header("Image Description Generated.")
    st.success(answer)

    model_id = "meta-llama/Meta-Llama-3-8B"

    pipeline = transformers.pipeline("text-generation", model=model_id, model_kwargs={"torch_dtype": torch.bfloat16}, device_map="auto")

    story = pipeline("Generate a short story based on image description: " + answer)

    st.header("Story Generated.")
    st.success(story)