Teamcoadminka / app.py
Illioa's picture
Update app.py
b5b47eb verified
raw
history blame contribute delete
No virus
2.14 kB
import gradio as gr
import base64
import json
import os
from PIL import Image
import io
from langchain_openai import ChatOpenAI
from langchain_community.callbacks import get_openai_callback
import replicate
# Set up environment variables for API keys
os.environ["OPENAI_API_KEY"] = os.environ.get("OPENAI_API_KEY")
os.environ["REPLICATE_API_TOKEN"] = os.environ.get("REPLICATE_API_TOKEN")
# Initialize the LLM model
llm = ChatOpenAI(model='gpt-3.5-turbo-0125', temperature=0.2)
with open("Resource/instructions copy.txt", "r") as f:
instructions = f.read()
def image_to_base64(image):
# Convert PIL Image to Bytes
buffered = io.BytesIO()
image.save(buffered, format="JPEG")
return base64.b64encode(buffered.getvalue()).decode('utf-8')
def captions_image(image):
# Convert image to base64 string
image_base64 = f"data:image/jpeg;base64,{image_to_base64(image)}"
# Call the Replicate API for image captioning
response = replicate.run(
"salesforce/blip:2e1dddc8621f72155f24cf2e0adbde548458d3cab9f00c0139eea840d0ac4746",
input={"task": "image_captioning", "image": image_base64}
)
return response
def generate_prompt_gpt_3_turbo(image,instructions):
# Get image captions
image_description = captions_image(image)
# Read instructions from a file (or directly insert your text here)
# Format the prompt with the image description
prompt = instructions.format(image_description=image_description)
with get_openai_callback() as cb:
# Invoke LLM model and get response
response = llm.invoke(prompt).content
# Convert response to JSON if necessary
response_dict = json.loads(response)
return response_dict, image_description, cb
# Define the Gradio interface
iface = gr.Interface(
fn=generate_prompt_gpt_3_turbo,
inputs=[gr.Image(type="pil"),gr.Textbox(value = instructions)],
outputs=[gr.JSON(),gr.Textbox(),gr.Textbox()],
title="Image to Text Generator",
description="Upload an image to generate descriptive text based on the image."
)
# Run the interface
iface.launch(share=True)