Spaces:

aakash0563
/

Gemini-vision

Sleeping

App Files Files Community

Gemini-vision / app.py

aakash0563

Update app.py

7d18ab6 verified 5 months ago

raw history blame

No virus

3.14 kB

	# import google.generativeai as genai
	# from PIL import Image
	# import gradio as gr
	# import numpy as np
	# import os

	# GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

	# # Now you can use hugging_face_api_key in your code

	# genai.configure(api_key=GOOGLE_API_KEY)

	# model = genai.GenerativeModel('gemini-pro-vision')
	# def process_image_and_text(image, text):
	# # Assuming image is the input from Gradio
	# if text:
	# image_array = np.asarray(image.data) # Convert memoryview to NumPy array
	# image = Image.fromarray(image_array.astype('uint8'), 'RGB') # Now you can use astype
	# response = model.generate_content([text, image])
	# return response.text
	# else:
	# image_array = np.asarray(image.data) # Convert memoryview to NumPy array
	# image = Image.fromarray(image_array.astype('uint8'), 'RGB') # Now you can use astype
	# response = model.generate_content(["Tell me about this image in bulletin format", image])
	# return response.text


	# iface = gr.Interface(
	# process_image_and_text,
	# inputs=["image", "textbox"], # Specify image and text inputs
	# outputs="textbox", # Specify text output
	# title="Image and Text Processor", # Set the app title
	# )

	# iface.launch(debug=True, share=True) # Launch the Gradio app


	from dotenv import load_dotenv
	import google.generativeai as genai
	import os
	import os
	from pdf2image import convert_from_path
	from PIL import Image
	import pdf2image
	import numpy as np

	genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
	from youtube_transcript_api import YouTubeTranscriptApi
	load_dotenv()
	import gradio as gr
	# print(llm.predict("Who is the PM of India?"))
	model = genai.GenerativeModel('gemini-pro-vision')
	def process_image_and_text(images):
	response = {}
	for i,image in enumerate(images):
	# # Assuming image is the input from Gradio
	# image_array = np.asarray(image.data) # Convert memoryview to NumPy array
	# image = Image.fromarray(image_array.astype('uint8'), 'RGB') # Now you can use astype
	response = model.generate_content(["You are act as a tutor Solve all the question in the image in step by step: ", image])
	response[i] = response.text
	return response

	def input_pdf_setup(uploaded_pdf):
	# Convert PDF pages to images
	images = convert_from_path(uploaded_pdf, dpi=200)
	return images



	def extract_answer(uploaded_pdf):
	"""Retrieves answers from processed images and presents them clearly."""
	images = input_pdf_setup(uploaded_pdf)
	responses = process_image_and_text(images=images)

	# Present results in a user-friendly format
	answers = []
	for i, response in enumerate(responses.values()):
	answers.append(f"Answer for question {i+1}:\n {response}")

	return "\n".join(answers)

	# Create Gradio interface
	iface = gr.Interface(
	fn=extract_answer,
	inputs="file",
	outputs="text",
	title="Question-Answering with Gemstone.ai",
	description="Upload a PDF containing questions, and get step-by-step answers!",
	allow_flagging=True,
	)


	# Launch the Gradio application
	iface.launch(share=True, debug=True)