Gemini-vision / app.py
aakash0563's picture
Update app.py
8b4a847 verified
raw
history blame
No virus
3.17 kB
import google.generativeai as genai
from PIL import Image
import gradio as gr
import numpy as np
import os
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
# Now you can use hugging_face_api_key in your code
genai.configure(api_key=GOOGLE_API_KEY)
model = genai.GenerativeModel('gemini-pro-vision')
def process_image_and_text(image, text):
# Assuming image is the input from Gradio
if text:
image_array = np.asarray(image.data) # Convert memoryview to NumPy array
image = Image.fromarray(image_array.astype('uint8'), 'RGB') # Now you can use astype
response = model.generate_content([text, image])
return response.text
else:
image_array = np.asarray(image.data) # Convert memoryview to NumPy array
image = Image.fromarray(image_array.astype('uint8'), 'RGB') # Now you can use astype
response = model.generate_content(["Tell me about this image in bulletin format", image])
return response.text
iface = gr.Interface(
process_image_and_text,
inputs=["image", "textbox"], # Specify image and text inputs
outputs="textbox", # Specify text output
title="Image and Text Processor", # Set the app title
)
iface.launch(debug=True, share=True) # Launch the Gradio app
# import google.generativeai as genai
# import os
# import os
# from pdf2image import convert_from_path
# from PIL import Image
# import pdf2image
# import numpy as np
# GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
# # Now you can use hugging_face_api_key in your code
# genai.configure(api_key=GOOGLE_API_KEY)
# import gradio as gr
# # print(llm.predict("Who is the PM of India?"))
# model = genai.GenerativeModel('gemini-pro-vision')
# def process_image_and_text(images):
# response = {}
# for i,image in enumerate(images):
# # # Assuming image is the input from Gradio
# # image_array = np.asarray(image.data) # Convert memoryview to NumPy array
# # image = Image.fromarray(image_array.astype('uint8'), 'RGB') # Now you can use astype
# response = model.generate_content(["You are act as a tutor Solve all the question in the image in step by step: ", image])
# response[i] = response.text
# return response
# def input_pdf_setup(uploaded_pdf):
# # Convert PDF pages to images
# images = convert_from_path(uploaded_pdf, dpi=200)
# return images
# def extract_answer(uploaded_pdf):
# """Retrieves answers from processed images and presents them clearly."""
# images = input_pdf_setup(uploaded_pdf)
# responses = process_image_and_text(images=images)
# # Present results in a user-friendly format
# answers = []
# for i, response in enumerate(responses.values()):
# answers.append(f"Answer for question {i+1}:\n {response}")
# return "\n".join(answers)
# # Create Gradio interface
# iface = gr.Interface(
# fn=extract_answer,
# inputs="file",
# outputs="text",
# title="Question-Answering with Gemstone.ai",
# description="Upload a PDF containing questions, and get step-by-step answers!",
# allow_flagging=True,
# )
# # Launch the Gradio application
# iface.launch(share=True, debug=True)