Gemini-vision / app.py
aakash0563's picture
Update app.py
8b4a847 verified
import google.generativeai as genai
from PIL import Image
import gradio as gr
import numpy as np
import os
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
# Now you can use hugging_face_api_key in your code
genai.configure(api_key=GOOGLE_API_KEY)
model = genai.GenerativeModel('gemini-pro-vision')
def process_image_and_text(image, text):
# Assuming image is the input from Gradio
if text:
image_array = np.asarray(image.data) # Convert memoryview to NumPy array
image = Image.fromarray(image_array.astype('uint8'), 'RGB') # Now you can use astype
response = model.generate_content([text, image])
return response.text
else:
image_array = np.asarray(image.data) # Convert memoryview to NumPy array
image = Image.fromarray(image_array.astype('uint8'), 'RGB') # Now you can use astype
response = model.generate_content(["Tell me about this image in bulletin format", image])
return response.text
iface = gr.Interface(
process_image_and_text,
inputs=["image", "textbox"], # Specify image and text inputs
outputs="textbox", # Specify text output
title="Image and Text Processor", # Set the app title
)
iface.launch(debug=True, share=True) # Launch the Gradio app
# import google.generativeai as genai
# import os
# import os
# from pdf2image import convert_from_path
# from PIL import Image
# import pdf2image
# import numpy as np
# GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
# # Now you can use hugging_face_api_key in your code
# genai.configure(api_key=GOOGLE_API_KEY)
# import gradio as gr
# # print(llm.predict("Who is the PM of India?"))
# model = genai.GenerativeModel('gemini-pro-vision')
# def process_image_and_text(images):
# response = {}
# for i,image in enumerate(images):
# # # Assuming image is the input from Gradio
# # image_array = np.asarray(image.data) # Convert memoryview to NumPy array
# # image = Image.fromarray(image_array.astype('uint8'), 'RGB') # Now you can use astype
# response = model.generate_content(["You are act as a tutor Solve all the question in the image in step by step: ", image])
# response[i] = response.text
# return response
# def input_pdf_setup(uploaded_pdf):
# # Convert PDF pages to images
# images = convert_from_path(uploaded_pdf, dpi=200)
# return images
# def extract_answer(uploaded_pdf):
# """Retrieves answers from processed images and presents them clearly."""
# images = input_pdf_setup(uploaded_pdf)
# responses = process_image_and_text(images=images)
# # Present results in a user-friendly format
# answers = []
# for i, response in enumerate(responses.values()):
# answers.append(f"Answer for question {i+1}:\n {response}")
# return "\n".join(answers)
# # Create Gradio interface
# iface = gr.Interface(
# fn=extract_answer,
# inputs="file",
# outputs="text",
# title="Question-Answering with Gemstone.ai",
# description="Upload a PDF containing questions, and get step-by-step answers!",
# allow_flagging=True,
# )
# # Launch the Gradio application
# iface.launch(share=True, debug=True)