AamirAli123's picture
Create app.py
a5f38d9 verified
import PIL.Image
import gradio as gr
import base64
import os
import google.generativeai as genai
from dotenv import load_dotenv
load_dotenv()
# Set Google API key
GOOGLe_API_KEY = os.getenv("GOOGLE_API_KEY")
genai.configure(api_key = GOOGLe_API_KEY)
# Create the Model
txt_model = genai.GenerativeModel('gemini-pro')
vis_model = genai.GenerativeModel('gemini-pro-vision')
# Image to Base 64 Converter
def image_to_base64(image_path):
with open(image_path, 'rb') as img:
encoded_string = base64.b64encode(img.read())
return encoded_string.decode('utf-8')
# Function that takes User Inputs and displays it on ChatUI
def query_message(history,txt,img):
if not img:
history += [(txt,None)]
return history
base64 = image_to_base64(img)
data_url = f"data:image/jpeg;base64,{base64}"
history += [(f"{txt} ![]({data_url})", None)]
return history
# Function that takes User Inputs, generates Response and displays on Chat UI
def llm_response(history,text,img):
if not img:
response = txt_model.generate_content(text)
history += [(None,response.text)]
return history, gr.update(value = "")
else:
img = PIL.Image.open(img)
response = vis_model.generate_content([text,img])
history += [(None,response.text)]
return history, gr.update(value = "")
def image_to_base64(image_path):
"""
Reads an image file and returns its base64 encoded representation.
Args:
image_path (str): The path to the image file.
Returns:
str: The base64 encoded representation of the image data.
"""
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode("utf-8")
# Encode the logo image into base64
logo_base64 = image_to_base64("pixelpk_logo.png")
markdown_content = f"""
<img src="data:image/png;base64,{logo_base64}" alt="Feedback Logo" style="width: 100px; height: 100px; margin-top: 10px;" />
<h1>MultiModal Chatbot</h1>
<p style="margin-top: 5px;">Multimodal chatbot is designed to chat with text and images.</p>
"""
css = """
h1 {
text-align: center;
display:block;
}
"""
# Interface Code
with gr.Blocks(gr.themes.Monochrome(), css = css) as app:
# Display introductory markdown content
gr.Markdown(f"<center>{markdown_content}</center>")
with gr.Row():
image_box = gr.Image(type = "filepath")
chatbot = gr.Chatbot(scale = 3)
text_box = gr.Textbox(
placeholder="Enter text and press enter, or upload an image",
container=False,
)
btn = gr.Button("Submit")
clicked = btn.click(query_message,
[chatbot,text_box,image_box],
[chatbot]
).then(llm_response,
[chatbot,text_box,image_box],
[chatbot, text_box]
)
clicked = text_box.submit(query_message,
[chatbot,text_box,image_box],
[chatbot]
).then(llm_response,
[chatbot,text_box,image_box],
[chatbot, text_box]
)
app.queue()
app.launch(share = True, debug = True)