# import google.generativeai as genai # from PIL import Image # import gradio as gr # import numpy as np # import os # GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") # # Now you can use hugging_face_api_key in your code # genai.configure(api_key=GOOGLE_API_KEY) # model = genai.GenerativeModel('gemini-pro-vision') # def process_image_and_text(image, text): # # Assuming image is the input from Gradio # if text: # image_array = np.asarray(image.data) # Convert memoryview to NumPy array # image = Image.fromarray(image_array.astype('uint8'), 'RGB') # Now you can use astype # response = model.generate_content([text, image]) # return response.text # else: # image_array = np.asarray(image.data) # Convert memoryview to NumPy array # image = Image.fromarray(image_array.astype('uint8'), 'RGB') # Now you can use astype # response = model.generate_content(["Tell me about this image in bulletin format", image]) # return response.text # iface = gr.Interface( # process_image_and_text, # inputs=["image", "textbox"], # Specify image and text inputs # outputs="textbox", # Specify text output # title="Image and Text Processor", # Set the app title # ) # iface.launch(debug=True, share=True) # Launch the Gradio app from dotenv import load_dotenv import google.generativeai as genai import os import os from pdf2image import convert_from_path from PIL import Image import pdf2image import numpy as np genai.configure(api_key=os.getenv("GOOGLE_API_KEY")) from youtube_transcript_api import YouTubeTranscriptApi load_dotenv() import gradio as gr # print(llm.predict("Who is the PM of India?")) model = genai.GenerativeModel('gemini-pro-vision') def process_image_and_text(images): response = {} for i,image in enumerate(images): # # Assuming image is the input from Gradio # image_array = np.asarray(image.data) # Convert memoryview to NumPy array # image = Image.fromarray(image_array.astype('uint8'), 'RGB') # Now you can use astype response = model.generate_content(["You are act as a tutor Solve all the question in the image in step by step: ", image]) response[i] = response.text return response def input_pdf_setup(uploaded_pdf): # Convert PDF pages to images images = convert_from_path(uploaded_pdf, dpi=200) return images def extract_answer(uploaded_pdf): """Retrieves answers from processed images and presents them clearly.""" images = input_pdf_setup(uploaded_pdf) responses = process_image_and_text(images=images) # Present results in a user-friendly format answers = [] for i, response in enumerate(responses.values()): answers.append(f"Answer for question {i+1}:\n {response}") return "\n".join(answers) # Create Gradio interface iface = gr.Interface( fn=extract_answer, inputs="file", outputs="text", title="Question-Answering with Gemstone.ai", description="Upload a PDF containing questions, and get step-by-step answers!", allow_flagging=True, ) # Launch the Gradio application iface.launch(share=True, debug=True)