aakash0563 commited on
Commit
7d18ab6
1 Parent(s): 504d166

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -23
app.py CHANGED
@@ -1,35 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import google.generativeai as genai
 
 
 
2
  from PIL import Image
3
- import gradio as gr
4
  import numpy as np
5
- import os
6
 
7
- GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- # Now you can use hugging_face_api_key in your code
 
 
 
10
 
11
- genai.configure(api_key=GOOGLE_API_KEY)
12
 
13
- model = genai.GenerativeModel('gemini-pro-vision')
14
- def process_image_and_text(image, text):
15
- # Assuming image is the input from Gradio
16
- if text:
17
- image_array = np.asarray(image.data) # Convert memoryview to NumPy array
18
- image = Image.fromarray(image_array.astype('uint8'), 'RGB') # Now you can use astype
19
- response = model.generate_content([text, image])
20
- return response.text
21
- else:
22
- image_array = np.asarray(image.data) # Convert memoryview to NumPy array
23
- image = Image.fromarray(image_array.astype('uint8'), 'RGB') # Now you can use astype
24
- response = model.generate_content(["Tell me about this image in bulletin format", image])
25
- return response.text
26
 
 
 
 
 
 
 
 
 
 
27
 
 
 
 
28
  iface = gr.Interface(
29
- process_image_and_text,
30
- inputs=["image", "textbox"], # Specify image and text inputs
31
- outputs="textbox", # Specify text output
32
- title="Image and Text Processor", # Set the app title
 
 
33
  )
34
 
35
- iface.launch(debug=True, share=True) # Launch the Gradio app
 
 
 
1
+ # import google.generativeai as genai
2
+ # from PIL import Image
3
+ # import gradio as gr
4
+ # import numpy as np
5
+ # import os
6
+
7
+ # GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
8
+
9
+ # # Now you can use hugging_face_api_key in your code
10
+
11
+ # genai.configure(api_key=GOOGLE_API_KEY)
12
+
13
+ # model = genai.GenerativeModel('gemini-pro-vision')
14
+ # def process_image_and_text(image, text):
15
+ # # Assuming image is the input from Gradio
16
+ # if text:
17
+ # image_array = np.asarray(image.data) # Convert memoryview to NumPy array
18
+ # image = Image.fromarray(image_array.astype('uint8'), 'RGB') # Now you can use astype
19
+ # response = model.generate_content([text, image])
20
+ # return response.text
21
+ # else:
22
+ # image_array = np.asarray(image.data) # Convert memoryview to NumPy array
23
+ # image = Image.fromarray(image_array.astype('uint8'), 'RGB') # Now you can use astype
24
+ # response = model.generate_content(["Tell me about this image in bulletin format", image])
25
+ # return response.text
26
+
27
+
28
+ # iface = gr.Interface(
29
+ # process_image_and_text,
30
+ # inputs=["image", "textbox"], # Specify image and text inputs
31
+ # outputs="textbox", # Specify text output
32
+ # title="Image and Text Processor", # Set the app title
33
+ # )
34
+
35
+ # iface.launch(debug=True, share=True) # Launch the Gradio app
36
+
37
+
38
+ from dotenv import load_dotenv
39
  import google.generativeai as genai
40
+ import os
41
+ import os
42
+ from pdf2image import convert_from_path
43
  from PIL import Image
44
+ import pdf2image
45
  import numpy as np
 
46
 
47
+ genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
48
+ from youtube_transcript_api import YouTubeTranscriptApi
49
+ load_dotenv()
50
+ import gradio as gr
51
+ # print(llm.predict("Who is the PM of India?"))
52
+ model = genai.GenerativeModel('gemini-pro-vision')
53
+ def process_image_and_text(images):
54
+ response = {}
55
+ for i,image in enumerate(images):
56
+ # # Assuming image is the input from Gradio
57
+ # image_array = np.asarray(image.data) # Convert memoryview to NumPy array
58
+ # image = Image.fromarray(image_array.astype('uint8'), 'RGB') # Now you can use astype
59
+ response = model.generate_content(["You are act as a tutor Solve all the question in the image in step by step: ", image])
60
+ response[i] = response.text
61
+ return response
62
 
63
+ def input_pdf_setup(uploaded_pdf):
64
+ # Convert PDF pages to images
65
+ images = convert_from_path(uploaded_pdf, dpi=200)
66
+ return images
67
 
 
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
+ def extract_answer(uploaded_pdf):
71
+ """Retrieves answers from processed images and presents them clearly."""
72
+ images = input_pdf_setup(uploaded_pdf)
73
+ responses = process_image_and_text(images=images)
74
+
75
+ # Present results in a user-friendly format
76
+ answers = []
77
+ for i, response in enumerate(responses.values()):
78
+ answers.append(f"Answer for question {i+1}:\n {response}")
79
 
80
+ return "\n".join(answers)
81
+
82
+ # Create Gradio interface
83
  iface = gr.Interface(
84
+ fn=extract_answer,
85
+ inputs="file",
86
+ outputs="text",
87
+ title="Question-Answering with Gemstone.ai",
88
+ description="Upload a PDF containing questions, and get step-by-step answers!",
89
+ allow_flagging=True,
90
  )
91
 
92
+
93
+ # Launch the Gradio application
94
+ iface.launch(share=True, debug=True)