File size: 3,167 Bytes
8b4a847
 
 
 
 
7d18ab6
8b4a847
7d18ab6
8b4a847
7d18ab6
8b4a847
7d18ab6
8b4a847
 
 
 
 
 
 
 
 
 
 
 
 
7d18ab6
 
8b4a847
 
 
 
 
 
7d18ab6
8b4a847
7d18ab6
 
2768c94
8b4a847
 
 
 
 
 
 
 
2768c94
8b4a847
2768c94
8b4a847
504d166
a5372c2
8b4a847
 
 
 
 
 
 
 
 
 
 
 
504d166
8b4a847
 
 
 
504d166
5600f83
 
8b4a847
 
 
 
7d18ab6
8b4a847
 
 
 
5600f83
8b4a847
7d18ab6
8b4a847
 
 
 
 
 
 
 
 
5600f83
7d18ab6
8b4a847
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import google.generativeai as genai
from PIL import Image
import gradio as gr
import numpy as np
import os

GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

# Now you can use hugging_face_api_key in your code

genai.configure(api_key=GOOGLE_API_KEY)

model = genai.GenerativeModel('gemini-pro-vision')
def process_image_and_text(image, text):
  # Assuming image is the input from Gradio
  if text:
    image_array = np.asarray(image.data)  # Convert memoryview to NumPy array
    image = Image.fromarray(image_array.astype('uint8'), 'RGB')  # Now you can use astype
    response = model.generate_content([text, image])
    return response.text
  else:
    image_array = np.asarray(image.data)  # Convert memoryview to NumPy array
    image = Image.fromarray(image_array.astype('uint8'), 'RGB')  # Now you can use astype
    response = model.generate_content(["Tell me about this image in bulletin format", image])
    return response.text


iface = gr.Interface(
    process_image_and_text,
    inputs=["image", "textbox"],  # Specify image and text inputs
    outputs="textbox",          # Specify text output
    title="Image and Text Processor",  # Set the app title
)

iface.launch(debug=True, share=True)  # Launch the Gradio app



# import google.generativeai as genai
# import os
# import os
# from pdf2image import convert_from_path
# from PIL import Image
# import pdf2image
# import numpy as np
# GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

# # Now you can use hugging_face_api_key in your code

# genai.configure(api_key=GOOGLE_API_KEY)


# import gradio as gr
# # print(llm.predict("Who is the PM of India?"))
# model = genai.GenerativeModel('gemini-pro-vision')
# def process_image_and_text(images):
#     response = {}
#     for i,image in enumerate(images):
#         # # Assuming image is the input from Gradio
#         # image_array = np.asarray(image.data)  # Convert memoryview to NumPy array
#         # image = Image.fromarray(image_array.astype('uint8'), 'RGB')  # Now you can use astype
#         response = model.generate_content(["You are act as a tutor Solve all the question in the image in step by step: ", image])
#         response[i] = response.text
#     return response

# def input_pdf_setup(uploaded_pdf):
#     # Convert PDF pages to images
#     images = convert_from_path(uploaded_pdf, dpi=200)
#     return images



# def extract_answer(uploaded_pdf):
#     """Retrieves answers from processed images and presents them clearly."""
#     images = input_pdf_setup(uploaded_pdf)
#     responses = process_image_and_text(images=images)

#     # Present results in a user-friendly format
#     answers = []
#     for i, response in enumerate(responses.values()):
#         answers.append(f"Answer for question {i+1}:\n {response}")

#     return "\n".join(answers)

# # Create Gradio interface
# iface = gr.Interface(
#     fn=extract_answer,
#     inputs="file",
#     outputs="text",
#     title="Question-Answering with Gemstone.ai",
#     description="Upload a PDF containing questions, and get step-by-step answers!",
#     allow_flagging=True,
# )


# # Launch the Gradio application
# iface.launch(share=True, debug=True)