|
import torch |
|
from PIL import Image |
|
from transformers import AutoModel, AutoTokenizer |
|
import streamlit as st |
|
from transformers import pipeline |
|
from huggingface_hub import InferenceClient |
|
import os |
|
|
|
|
|
my_key = "your_api_key_here" |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2', trust_remote_code=True) |
|
model = AutoModel.from_pretrained('openbmb/MiniCPM-V-2',trust_remote_code=True) |
|
model.eval() |
|
|
|
|
|
device = 'cuda' if torch.cuda.is_available() else 'cpu' |
|
model = model.to(device=device, dtype=torch.float16 if device == 'cuda' else torch.float32) |
|
|
|
|
|
api_key = os.getenv("HF_API_KEY") |
|
|
|
|
|
client = InferenceClient(api_key=api_key) |
|
|
|
|
|
st.title("Image Questioning and Content Generation App") |
|
st.write("Upload an image and ask a question. The model will respond with a description, and you can generate a song or story based on the response.") |
|
|
|
|
|
uploaded_image = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"]) |
|
if uploaded_image: |
|
image = Image.open(uploaded_image).convert('RGB') |
|
st.image(image, caption="Uploaded Image", use_column_width=True) |
|
|
|
|
|
question = st.text_input("Ask a question about the image") |
|
if question and uploaded_image: |
|
msgs = [{'role': 'user', 'content': question}] |
|
|
|
|
|
with st.spinner("Processing..."): |
|
res, context, _ = model.chat( |
|
image=image, |
|
msgs=msgs, |
|
context=None, |
|
tokenizer=tokenizer, |
|
sampling=True, |
|
temperature=0.7 |
|
) |
|
|
|
st.write("Model's response:", res) |
|
|
|
|
|
option = st.selectbox("Generate content based on the response", ["Choose...", "Write a Song", "Write a Story"]) |
|
|
|
if option != "Choose...": |
|
|
|
if option == "Write a Song": |
|
messages = [{"role": "user", "content": f"Write a song about the following: {res}"}] |
|
elif option == "Write a Story": |
|
messages = [{"role": "user", "content": f"Write a story about the following: {res}"}] |
|
|
|
|
|
st.write(f"Generating {option.lower()}...") |
|
|
|
stream = client.chat.completions.create( |
|
model="meta-llama/Llama-3.2-3B-Instruct", |
|
messages=messages, |
|
max_tokens=500, |
|
stream=True |
|
) |
|
|
|
generated_text = "" |
|
for chunk in stream: |
|
generated_text += chunk.choices[0].delta.content |
|
st.write(generated_text) |
|
|