import os from PIL import Image from gtts import gTTS from io import BytesIO import io from openai import OpenAI #from dotenv import load_dotenv import streamlit as st from transformers import pipeline # For explaining what is going on in the image img_nar = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large") #load_dotenv() client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) st.header("Image Narrator") # Temporary uploaded_image = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"]) if 'history' not in st.session_state: st.session_state['history'] = [] personality = st.text_input("Enter a personality") image_narration = "No narration given" # Check if an image has been uploaded if uploaded_image is not None: # Convert the uploaded file to a PIL image bytes_data = uploaded_image.getvalue() pil_image = Image.open(io.BytesIO(bytes_data)) # Now, use the PIL image with the pipeline image_narration = img_nar(pil_image) # Display the uploaded image using the original bytes data st.image(pil_image, caption='Uploaded Image.', use_column_width=True) image_narration = image_narration[0]["generated_text"] #st.write(image_narration) def update_and_get_narration(personality, user_input): if personality and user_input: st.session_state['history'].append({"role": "user", "content": user_input}) response = client.chat.completions.create( model="gpt-3.5-turbo", messages=[ {"role": "system", "content": f"You reiterate what is said to you but narrate it like a {personality}."} ] + st.session_state['history'] ) gpt_response = response.choices[0].message.content st.session_state['history'].append({"role": "assistant", "content": gpt_response}) return gpt_response else: return "Please enter both a personality and some image classification text." if st.button('Narrate'): narration = update_and_get_narration(personality, image_narration) st.write(narration) tts = gTTS(text=narration, lang='en') audio_buffer = BytesIO() tts.write_to_fp(audio_buffer) audio_buffer.seek(0) st.audio(audio_buffer, format='audio/mp3', start_time=0) else: st.write(st.session_state['history'][-1]['content'] if st.session_state['history'] else "Narration will appear here.")