import google.generativeai as genai from PIL import Image import gradio as gr import numpy as np import os GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") # Now you can use hugging_face_api_key in your code genai.configure(api_key=GOOGLE_API_KEY) model = genai.GenerativeModel('gemini-pro-vision') def process_image_and_text(image, text): # Assuming image is the input from Gradio if text: image_array = np.asarray(image.data) # Convert memoryview to NumPy array image = Image.fromarray(image_array.astype('uint8'), 'RGB') # Now you can use astype response = model.generate_content([text, image]) return response.text else: image_array = np.asarray(image.data) # Convert memoryview to NumPy array image = Image.fromarray(image_array.astype('uint8'), 'RGB') # Now you can use astype response = model.generate_content(["Tell me about this image in bulletin format", image]) return response.text iface = gr.Interface( process_image_and_text, inputs=["image", "textbox"], # Specify image and text inputs outputs="textbox", # Specify text output title="Image and Text Processor", # Set the app title ) iface.launch(debug=True, share=True) # Launch the Gradio app # import google.generativeai as genai # import os # import os # from pdf2image import convert_from_path # from PIL import Image # import pdf2image # import numpy as np # GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") # # Now you can use hugging_face_api_key in your code # genai.configure(api_key=GOOGLE_API_KEY) # import gradio as gr # # print(llm.predict("Who is the PM of India?")) # model = genai.GenerativeModel('gemini-pro-vision') # def process_image_and_text(images): # response = {} # for i,image in enumerate(images): # # # Assuming image is the input from Gradio # # image_array = np.asarray(image.data) # Convert memoryview to NumPy array # # image = Image.fromarray(image_array.astype('uint8'), 'RGB') # Now you can use astype # response = model.generate_content(["You are act as a tutor Solve all the question in the image in step by step: ", image]) # response[i] = response.text # return response # def input_pdf_setup(uploaded_pdf): # # Convert PDF pages to images # images = convert_from_path(uploaded_pdf, dpi=200) # return images # def extract_answer(uploaded_pdf): # """Retrieves answers from processed images and presents them clearly.""" # images = input_pdf_setup(uploaded_pdf) # responses = process_image_and_text(images=images) # # Present results in a user-friendly format # answers = [] # for i, response in enumerate(responses.values()): # answers.append(f"Answer for question {i+1}:\n {response}") # return "\n".join(answers) # # Create Gradio interface # iface = gr.Interface( # fn=extract_answer, # inputs="file", # outputs="text", # title="Question-Answering with Gemstone.ai", # description="Upload a PDF containing questions, and get step-by-step answers!", # allow_flagging=True, # ) # # Launch the Gradio application # iface.launch(share=True, debug=True)