Spaces:

aakash0563
/

Gemini-vision

Running

App Files Files Community

Gemini-vision / app.py

aakash0563

Update app.py

8b4a847 verified 8 months ago

raw

history blame

No virus

3.17 kB

	import google.generativeai as genai
	from PIL import Image
	import gradio as gr
	import numpy as np
	import os

	GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

	# Now you can use hugging_face_api_key in your code

	genai.configure(api_key=GOOGLE_API_KEY)

	model = genai.GenerativeModel('gemini-pro-vision')
	def process_image_and_text(image, text):
	# Assuming image is the input from Gradio
	if text:
	image_array = np.asarray(image.data) # Convert memoryview to NumPy array
	image = Image.fromarray(image_array.astype('uint8'), 'RGB') # Now you can use astype
	response = model.generate_content([text, image])
	return response.text
	else:
	image_array = np.asarray(image.data) # Convert memoryview to NumPy array
	image = Image.fromarray(image_array.astype('uint8'), 'RGB') # Now you can use astype
	response = model.generate_content(["Tell me about this image in bulletin format", image])
	return response.text


	iface = gr.Interface(
	process_image_and_text,
	inputs=["image", "textbox"], # Specify image and text inputs
	outputs="textbox", # Specify text output
	title="Image and Text Processor", # Set the app title
	)

	iface.launch(debug=True, share=True) # Launch the Gradio app



	# import google.generativeai as genai
	# import os
	# import os
	# from pdf2image import convert_from_path
	# from PIL import Image
	# import pdf2image
	# import numpy as np
	# GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

	# # Now you can use hugging_face_api_key in your code

	# genai.configure(api_key=GOOGLE_API_KEY)


	# import gradio as gr
	# # print(llm.predict("Who is the PM of India?"))
	# model = genai.GenerativeModel('gemini-pro-vision')
	# def process_image_and_text(images):
	# response = {}
	# for i,image in enumerate(images):
	# # # Assuming image is the input from Gradio
	# # image_array = np.asarray(image.data) # Convert memoryview to NumPy array
	# # image = Image.fromarray(image_array.astype('uint8'), 'RGB') # Now you can use astype
	# response = model.generate_content(["You are act as a tutor Solve all the question in the image in step by step: ", image])
	# response[i] = response.text
	# return response

	# def input_pdf_setup(uploaded_pdf):
	# # Convert PDF pages to images
	# images = convert_from_path(uploaded_pdf, dpi=200)
	# return images



	# def extract_answer(uploaded_pdf):
	# """Retrieves answers from processed images and presents them clearly."""
	# images = input_pdf_setup(uploaded_pdf)
	# responses = process_image_and_text(images=images)

	# # Present results in a user-friendly format
	# answers = []
	# for i, response in enumerate(responses.values()):
	# answers.append(f"Answer for question {i+1}:\n {response}")

	# return "\n".join(answers)

	# # Create Gradio interface
	# iface = gr.Interface(
	# fn=extract_answer,
	# inputs="file",
	# outputs="text",
	# title="Question-Answering with Gemstone.ai",
	# description="Upload a PDF containing questions, and get step-by-step answers!",
	# allow_flagging=True,
	# )


	# # Launch the Gradio application
	# iface.launch(share=True, debug=True)