Spaces:

yehtutmaung
/

Story_and_Music_Generator_from_Image

Running

App Files Files Community

Story_and_Music_Generator_from_Image / app.py

yehtutmaung

Update app.py

ba581b5 verified 18 days ago

raw

history blame contribute delete

2.82 kB

	import torch
	from PIL import Image
	from transformers import AutoModel, AutoTokenizer
	import streamlit as st
	from transformers import pipeline
	from huggingface_hub import InferenceClient
	import os

	# Define your API key here
	my_key = "your_api_key_here"

	# Load tokenizer and model
	tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2', trust_remote_code=True)
	model = AutoModel.from_pretrained('openbmb/MiniCPM-V-2',trust_remote_code=True)
	model.eval()

	# Set device for model
	device = 'cuda' if torch.cuda.is_available() else 'cpu'
	model = model.to(device=device, dtype=torch.float16 if device == 'cuda' else torch.float32)

	# Retrieve the API key from the environment
	api_key = os.getenv("HF_API_KEY")

	# Initialize the Hugging Face Inference client with the API key
	client = InferenceClient(api_key=api_key)

	# Streamlit UI setup
	st.title("Image Questioning and Content Generation App")
	st.write("Upload an image and ask a question. The model will respond with a description, and you can generate a song or story based on the response.")

	# Upload an image
	uploaded_image = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
	if uploaded_image:
	image = Image.open(uploaded_image).convert('RGB')
	st.image(image, caption="Uploaded Image", use_column_width=True)

	# Text input for the question
	question = st.text_input("Ask a question about the image")
	if question and uploaded_image:
	msgs = [{'role': 'user', 'content': question}]

	# Model's response to the image question
	with st.spinner("Processing..."):
	res, context, _ = model.chat(
	image=image,
	msgs=msgs,
	context=None,
	tokenizer=tokenizer,
	sampling=True,
	temperature=0.7
	)

	st.write("Model's response:", res)

	# Options for generating content based on the response
	option = st.selectbox("Generate content based on the response", ["Choose...", "Write a Song", "Write a Story"])

	if option != "Choose...":
	# Create a message based on user choice
	if option == "Write a Song":
	messages = [{"role": "user", "content": f"Write a song about the following: {res}"}]
	elif option == "Write a Story":
	messages = [{"role": "user", "content": f"Write a story about the following: {res}"}]

	# Stream the content generation
	st.write(f"Generating {option.lower()}...")

	stream = client.chat.completions.create(
	model="meta-llama/Llama-3.2-3B-Instruct",
	messages=messages,
	max_tokens=500,
	stream=True
	)

	generated_text = ""
	for chunk in stream:
	generated_text += chunk.choices[0].delta.content
	st.write(generated_text) # Display each chunk as it's generated