Spaces:
Sleeping
Sleeping
# import google.generativeai as genai | |
# from PIL import Image | |
# import gradio as gr | |
# import numpy as np | |
# import os | |
# GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") | |
# # Now you can use hugging_face_api_key in your code | |
# genai.configure(api_key=GOOGLE_API_KEY) | |
# model = genai.GenerativeModel('gemini-pro-vision') | |
# def process_image_and_text(image, text): | |
# # Assuming image is the input from Gradio | |
# if text: | |
# image_array = np.asarray(image.data) # Convert memoryview to NumPy array | |
# image = Image.fromarray(image_array.astype('uint8'), 'RGB') # Now you can use astype | |
# response = model.generate_content([text, image]) | |
# return response.text | |
# else: | |
# image_array = np.asarray(image.data) # Convert memoryview to NumPy array | |
# image = Image.fromarray(image_array.astype('uint8'), 'RGB') # Now you can use astype | |
# response = model.generate_content(["Tell me about this image in bulletin format", image]) | |
# return response.text | |
# iface = gr.Interface( | |
# process_image_and_text, | |
# inputs=["image", "textbox"], # Specify image and text inputs | |
# outputs="textbox", # Specify text output | |
# title="Image and Text Processor", # Set the app title | |
# ) | |
# iface.launch(debug=True, share=True) # Launch the Gradio app | |
from dotenv import load_dotenv | |
import google.generativeai as genai | |
import os | |
import os | |
from pdf2image import convert_from_path | |
from PIL import Image | |
import pdf2image | |
import numpy as np | |
genai.configure(api_key=os.getenv("GOOGLE_API_KEY")) | |
from youtube_transcript_api import YouTubeTranscriptApi | |
load_dotenv() | |
import gradio as gr | |
# print(llm.predict("Who is the PM of India?")) | |
model = genai.GenerativeModel('gemini-pro-vision') | |
def process_image_and_text(images): | |
response = {} | |
for i,image in enumerate(images): | |
# # Assuming image is the input from Gradio | |
# image_array = np.asarray(image.data) # Convert memoryview to NumPy array | |
# image = Image.fromarray(image_array.astype('uint8'), 'RGB') # Now you can use astype | |
response = model.generate_content(["You are act as a tutor Solve all the question in the image in step by step: ", image]) | |
response[i] = response.text | |
return response | |
def input_pdf_setup(uploaded_pdf): | |
# Convert PDF pages to images | |
images = convert_from_path(uploaded_pdf, dpi=200) | |
return images | |
def extract_answer(uploaded_pdf): | |
"""Retrieves answers from processed images and presents them clearly.""" | |
images = input_pdf_setup(uploaded_pdf) | |
responses = process_image_and_text(images=images) | |
# Present results in a user-friendly format | |
answers = [] | |
for i, response in enumerate(responses.values()): | |
answers.append(f"Answer for question {i+1}:\n {response}") | |
return "\n".join(answers) | |
# Create Gradio interface | |
iface = gr.Interface( | |
fn=extract_answer, | |
inputs="file", | |
outputs="text", | |
title="Question-Answering with Gemstone.ai", | |
description="Upload a PDF containing questions, and get step-by-step answers!", | |
allow_flagging=True, | |
) | |
# Launch the Gradio application | |
iface.launch(share=True, debug=True) | |