Spaces:
Running
Running
import google.generativeai as genai | |
from PIL import Image | |
import gradio as gr | |
import numpy as np | |
import os | |
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") | |
# Now you can use hugging_face_api_key in your code | |
genai.configure(api_key=GOOGLE_API_KEY) | |
model = genai.GenerativeModel('gemini-pro-vision') | |
def process_image_and_text(image, text): | |
# Assuming image is the input from Gradio | |
if text: | |
image_array = np.asarray(image.data) # Convert memoryview to NumPy array | |
image = Image.fromarray(image_array.astype('uint8'), 'RGB') # Now you can use astype | |
response = model.generate_content([text, image]) | |
return response.text | |
else: | |
image_array = np.asarray(image.data) # Convert memoryview to NumPy array | |
image = Image.fromarray(image_array.astype('uint8'), 'RGB') # Now you can use astype | |
response = model.generate_content(["Tell me about this image in bulletin format", image]) | |
return response.text | |
iface = gr.Interface( | |
process_image_and_text, | |
inputs=["image", "textbox"], # Specify image and text inputs | |
outputs="textbox", # Specify text output | |
title="Image and Text Processor", # Set the app title | |
) | |
iface.launch(debug=True, share=True) # Launch the Gradio app | |
# import google.generativeai as genai | |
# import os | |
# import os | |
# from pdf2image import convert_from_path | |
# from PIL import Image | |
# import pdf2image | |
# import numpy as np | |
# GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") | |
# # Now you can use hugging_face_api_key in your code | |
# genai.configure(api_key=GOOGLE_API_KEY) | |
# import gradio as gr | |
# # print(llm.predict("Who is the PM of India?")) | |
# model = genai.GenerativeModel('gemini-pro-vision') | |
# def process_image_and_text(images): | |
# response = {} | |
# for i,image in enumerate(images): | |
# # # Assuming image is the input from Gradio | |
# # image_array = np.asarray(image.data) # Convert memoryview to NumPy array | |
# # image = Image.fromarray(image_array.astype('uint8'), 'RGB') # Now you can use astype | |
# response = model.generate_content(["You are act as a tutor Solve all the question in the image in step by step: ", image]) | |
# response[i] = response.text | |
# return response | |
# def input_pdf_setup(uploaded_pdf): | |
# # Convert PDF pages to images | |
# images = convert_from_path(uploaded_pdf, dpi=200) | |
# return images | |
# def extract_answer(uploaded_pdf): | |
# """Retrieves answers from processed images and presents them clearly.""" | |
# images = input_pdf_setup(uploaded_pdf) | |
# responses = process_image_and_text(images=images) | |
# # Present results in a user-friendly format | |
# answers = [] | |
# for i, response in enumerate(responses.values()): | |
# answers.append(f"Answer for question {i+1}:\n {response}") | |
# return "\n".join(answers) | |
# # Create Gradio interface | |
# iface = gr.Interface( | |
# fn=extract_answer, | |
# inputs="file", | |
# outputs="text", | |
# title="Question-Answering with Gemstone.ai", | |
# description="Upload a PDF containing questions, and get step-by-step answers!", | |
# allow_flagging=True, | |
# ) | |
# # Launch the Gradio application | |
# iface.launch(share=True, debug=True) | |