Claudz163's picture
added main
f9d8d7e
import streamlit as st
from transformers import pipeline
from huggingface_hub import InferenceClient
from PIL import Image
import os
def initialize():
if 'initialized' not in st.session_state: # Initialize only once
print("Initializing...")
st.session_state['initialized'] = True
st.session_state['api_key'] = os.getenv("HUGGINGFACE_TOKEN")
st.session_state['client'] = InferenceClient(api_key=st.session_state['api_key'])
def main():
initialize()
st.header("Character Captions")
st.write("Have a character caption any image you upload!")
character = st.selectbox("Choose a character", ["rapper", "shrek", "unintelligible", "cookie monster"])
uploaded_img = st.file_uploader("Upload an image")
if uploaded_img is not None:
# Open Image
image = Image.open(uploaded_img)
st.image(image)
# Get caption from image
image_captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
response = image_captioner(image)
caption = response[0]['generated_text']
# Pass the caption to a character prompt
character_prompts = {
"rapper": f"Describe this caption like you're a rapper: {caption}.",
"shrek": f"Describe this caption like you're Shrek: {caption}.",
"unintelligible": f"Describe this caption in a way that makes no sense: {caption}.",
"cookie monster": f"Describe this caption like you're cookie monster: {caption}."
}
prompt = character_prompts[character]
messages = [
{ "role": "user", "content": prompt }
]
# Pass to Llama for character output regarding image caption
stream = st.session_state['client'].chat.completions.create(
model="meta-llama/Llama-3.2-3B-Instruct",
messages=messages,
max_tokens=500,
stream=True
)
response = ''
for chunk in stream:
response += chunk.choices[0].delta.content
st.write(response)
if __name__ == '__main__':
main()