Spaces:

aakash0563
/

Gemini-vision

Sleeping

App Files Files Community

aakash0563 commited on Feb 18

Commit

7d18ab6

•

1 Parent(s): 504d166

Update app.py

Browse files

Files changed (1) hide show

app.py +82 -23

app.py CHANGED Viewed

@@ -1,35 +1,94 @@
 import google.generativeai as genai
 from PIL import Image
-import gradio as gr
 import numpy as np
-import os
-GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
-# Now you can use hugging_face_api_key in your code
-genai.configure(api_key=GOOGLE_API_KEY)
-model = genai.GenerativeModel('gemini-pro-vision')
-def process_image_and_text(image, text):
-  # Assuming image is the input from Gradio
-  if text:
-    image_array = np.asarray(image.data)  # Convert memoryview to NumPy array
-    image = Image.fromarray(image_array.astype('uint8'), 'RGB')  # Now you can use astype
-    response = model.generate_content([text, image])
-    return response.text
-  else:
-    image_array = np.asarray(image.data)  # Convert memoryview to NumPy array
-    image = Image.fromarray(image_array.astype('uint8'), 'RGB')  # Now you can use astype
-    response = model.generate_content(["Tell me about this image in bulletin format", image])
-    return response.text
 iface = gr.Interface(
-    process_image_and_text,
-    inputs=["image", "textbox"],  # Specify image and text inputs
-    outputs="textbox",          # Specify text output
-    title="Image and Text Processor",  # Set the app title
 )
-iface.launch(debug=True, share=True)  # Launch the Gradio app

+# import google.generativeai as genai
+# from PIL import Image
+# import gradio as gr
+# import numpy as np
+# import os
+# GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
+# # Now you can use hugging_face_api_key in your code
+# genai.configure(api_key=GOOGLE_API_KEY)
+# model = genai.GenerativeModel('gemini-pro-vision')
+# def process_image_and_text(image, text):
+#   # Assuming image is the input from Gradio
+#   if text:
+#     image_array = np.asarray(image.data)  # Convert memoryview to NumPy array
+#     image = Image.fromarray(image_array.astype('uint8'), 'RGB')  # Now you can use astype
+#     response = model.generate_content([text, image])
+#     return response.text
+#   else:
+#     image_array = np.asarray(image.data)  # Convert memoryview to NumPy array
+#     image = Image.fromarray(image_array.astype('uint8'), 'RGB')  # Now you can use astype
+#     response = model.generate_content(["Tell me about this image in bulletin format", image])
+#     return response.text
+# iface = gr.Interface(
+#     process_image_and_text,
+#     inputs=["image", "textbox"],  # Specify image and text inputs
+#     outputs="textbox",          # Specify text output
+#     title="Image and Text Processor",  # Set the app title
+# )
+# iface.launch(debug=True, share=True)  # Launch the Gradio app
+from dotenv import load_dotenv
 import google.generativeai as genai
+import os
+import os
+from pdf2image import convert_from_path
 from PIL import Image
+import pdf2image
 import numpy as np
+genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
+from youtube_transcript_api import YouTubeTranscriptApi
+load_dotenv()
+import gradio as gr
+# print(llm.predict("Who is the PM of India?"))
+model = genai.GenerativeModel('gemini-pro-vision')
+def process_image_and_text(images):
+    response = {}
+    for i,image in enumerate(images):
+        # # Assuming image is the input from Gradio
+        # image_array = np.asarray(image.data)  # Convert memoryview to NumPy array
+        # image = Image.fromarray(image_array.astype('uint8'), 'RGB')  # Now you can use astype
+        response = model.generate_content(["You are act as a tutor Solve all the question in the image in step by step: ", image])
+        response[i] = response.text
+    return response
+def input_pdf_setup(uploaded_pdf):
+    # Convert PDF pages to images
+    images = convert_from_path(uploaded_pdf, dpi=200)
+    return images
+def extract_answer(uploaded_pdf):
+    """Retrieves answers from processed images and presents them clearly."""
+    images = input_pdf_setup(uploaded_pdf)
+    responses = process_image_and_text(images=images)
+    # Present results in a user-friendly format
+    answers = []
+    for i, response in enumerate(responses.values()):
+        answers.append(f"Answer for question {i+1}:\n {response}")
+    return "\n".join(answers)
+# Create Gradio interface
 iface = gr.Interface(
+    fn=extract_answer,
+    inputs="file",
+    outputs="text",
+    title="Question-Answering with Gemstone.ai",
+    description="Upload a PDF containing questions, and get step-by-step answers!",
+    allow_flagging=True,
 )
+# Launch the Gradio application
+iface.launch(share=True, debug=True)