import gradio as gr import base64 import json import os from PIL import Image import io from langchain_openai import ChatOpenAI from langchain_community.callbacks import get_openai_callback import replicate # Set up environment variables for API keys os.environ["OPENAI_API_KEY"] = os.environ.get("OPENAI_API_KEY") os.environ["REPLICATE_API_TOKEN"] = os.environ.get("REPLICATE_API_TOKEN") # Initialize the LLM model llm = ChatOpenAI(model='gpt-3.5-turbo-0125', temperature=0.2) with open("Resource/instructions copy.txt", "r") as f: instructions = f.read() def image_to_base64(image): # Convert PIL Image to Bytes buffered = io.BytesIO() image.save(buffered, format="JPEG") return base64.b64encode(buffered.getvalue()).decode('utf-8') def captions_image(image): # Convert image to base64 string image_base64 = f"data:image/jpeg;base64,{image_to_base64(image)}" # Call the Replicate API for image captioning response = replicate.run( "salesforce/blip:2e1dddc8621f72155f24cf2e0adbde548458d3cab9f00c0139eea840d0ac4746", input={"task": "image_captioning", "image": image_base64} ) return response def generate_prompt_gpt_3_turbo(image,instructions): # Get image captions image_description = captions_image(image) # Read instructions from a file (or directly insert your text here) # Format the prompt with the image description prompt = instructions.format(image_description=image_description) with get_openai_callback() as cb: # Invoke LLM model and get response response = llm.invoke(prompt).content # Convert response to JSON if necessary response_dict = json.loads(response) return response_dict, image_description, cb # Define the Gradio interface iface = gr.Interface( fn=generate_prompt_gpt_3_turbo, inputs=[gr.Image(type="pil"),gr.Textbox(value = instructions)], outputs=[gr.JSON(),gr.Textbox(),gr.Textbox()], title="Image to Text Generator", description="Upload an image to generate descriptive text based on the image." ) # Run the interface iface.launch(share=True)