from dotenv import load_dotenv load_dotenv() # load all env. variables import streamlit as st import os import google.generativeai as genai import replicate from PIL import Image import time # set api key genai.configure(api_key=os.getenv("GOOGLE_API_KEY")) # REPLICATE_API_TOKEN=os.getenv("REPLICATE_API_TOKEN") # initialize our models vis_model = genai.GenerativeModel("gemini-pro-vision") langugae_model = genai.GenerativeModel("gemini-pro") # function to load gemini pro vision model and get responses def get_gemini_response(input, image): # if there present any text input besides image, then generate both if (input != "") and (image != ""): response = vis_model.generate_content([input, image]) elif (input != "") and (image == ""): response = langugae_model.generate_content(input) else: response = vis_model.generate_content(image) return response.text def stream_data(prompt, image): sentences = get_gemini_response(prompt, image).split(". ") for sentence in sentences: for word in sentence.split(): yield word + " " time.sleep(0.02) # initialize our streamlit app st.set_page_config( page_title="Multimodal Content Generation", page_icon="⚡️", layout="wide" ) # give title st.sidebar.title(":rainbow[MULTIMODAL CONTENT GENERATION]") st.sidebar.write("Built by [jaiminjariwala](https://github.com/jaiminjariwala) 😀") st.sidebar.divider() # Multimodals Options multimodal_options = st.sidebar.radio( "**Select What To Do...**", options=["Chat and Image Summarization", "Text 2 Image"], index=0, horizontal=False, ) st.sidebar.divider() # ======================================================================================= if multimodal_options == "Chat and Image Summarization": # New chat button, to get the fresh chat page if st.sidebar.button("Get **New Chat** Fresh Page"): st.session_state["messages"] = [] # Clear chat history st.experimental_rerun() # Trigger page reload # create image upload option in sidebar with st.expander("**Wanna Upload an Image?**"): uploaded_file = st.file_uploader("Choose an image for **Image Summarizer** task...", type=["jpg", "jpeg", "png"]) image="" if uploaded_file is not None: image=Image.open(uploaded_file) st.image(image, caption="Uploaded Image.", use_column_width=True) # Create a container to hold the entire chat history chat_container = st.container() # initialize chat history if "messages" not in st.session_state: st.session_state.messages = [] # Display chat messages from history on app rerun for message in st.session_state.messages: with chat_container: with st.chat_message(message["role"]): st.markdown(message["content"]) # create input prompt (textbox) if prompt := st.chat_input("Type here..."): # display user message in chat message container with chat_container: with st.chat_message("user"): st.markdown(prompt) # add user message to chat history st.session_state.messages.append({"role" : "user", "content" : prompt}) # display assistant message in chat message container with chat_container: with st.chat_message("assistant"): should_format_as_code = any(keyword in prompt.lower() for keyword in ["code", "python", "java", "javascript", "c++", "c", "program", "react", "reactjs", "node", "nodejs", "html", "css", "javascript", "js"]) if should_format_as_code: st.code(get_gemini_response(prompt, image)) else: st.write_stream(stream_data(prompt, image)) # Add assistant response to chat history st.session_state.messages.append({"role": "assistant", "content": get_gemini_response(prompt, image)}) # ============================================================================= def generate_and_display_image(submitted: bool, width: int, height: int, num_outputs: int, scheduler: str, num_inference_steps: int, prompt_strength: float, prompt: str): """ Generates an image using the specified prompt and parameters. """ if REPLICATE_API_TOKEN.startswith('r8_') and submitted and prompt: with st.status('Generating your image...', expanded=True) as status: try: # Only call the API if the "Submit" button was pressed if submitted: all_images = [] # List to store all generated images # calling the replicate API output = replicate.run( "stability-ai/sdxl:39ed52f2a78e934b3ba6e2a89f5b1c712de7dfea535525255b1aa35c5565e08b", input={ "prompt": prompt, "width": width, "height": height, "num_outputs": num_outputs, "scheduler": "K_EULER", "num_inference_steps": num_inference_steps, "guidance_scale": 7.5, "prompt_stregth": prompt_strength, "negative_prompt": "the absolute worst quality, distorted features", "refine": "expert_ensemble_refiner", "high_noise_frac": 0.8 } ) if output: st.toast( 'Your image has been generated!', icon='😍') # Save generated image to session state st.session_state.generated_image = output # Displaying the image for image in st.session_state.generated_image: with st.container(): st.image(image, caption="Generated Image ❄️", use_column_width=True) # Add image to the list all_images.append(image) # Save all generated images to session state st.session_state.all_images = all_images except replicate.exceptions.ReplicateError as e: st.error(f"Error generating image: {e}") # if not submitted elif not prompt: st.toast("Please input some prompt!", icon="⚠️") def refine_output(): """ Provides options for users to refine output parameters and returns them. """ with st.expander("**Refine your output if you want...**"): width = st.number_input("Width of output image", value=1024) height = st.number_input("Height of output image", value=1024) num_outputs = st.slider("Number of images to output", value=1, min_value=1, max_value=4) scheduler = st.selectbox('Scheduler', ('DDIM', 'DPMSolverMultistep', 'HeunDiscrete', 'KarrasDPM', 'K_EULER_ANCESTRAL', 'K_EULER', 'PNDM')) num_inference_steps = st.slider( "Number of denoising steps", value=50, min_value=1, max_value=500) prompt_strength = st.slider( "Prompt strength when using img2img/inpaint (1.0 corresponds to full destruction of information in image)", value=0.8, max_value=1.0, step=0.1) # prompt input prompt = st.text_input("Enter your prompt for the image:", value="Dog and cat dancing on moon") # Submit button to trigger image generation submitted = st.button("Generate") return submitted, width, height, num_outputs, scheduler, num_inference_steps, prompt_strength, prompt # Prompt input and image generation logic if multimodal_options == "Text 2 Image": REPLICATE_API_TOKEN=st.sidebar.text_input( "Enter your REPLICATE API TOKEN", placeholder="Paste token here...", type="password" ) os.environ["REPLICATE_API_TOKEN"]=REPLICATE_API_TOKEN if not REPLICATE_API_TOKEN.startswith('r8_'): st.warning('Please enter your REPLICATE API KEY in **Sidebar**!', icon='⚠') width, height, num_outputs, scheduler, num_inference_steps, prompt_strength, prompt, submitted = refine_output() generate_and_display_image(width, height, num_outputs, scheduler, num_inference_steps, prompt_strength, prompt, submitted) # ============================================================================