ProgramerSalar commited on
Commit
284dbfe
·
1 Parent(s): 6e89a88
Files changed (2) hide show
  1. app.py +0 -120
  2. gardio.py +120 -0
app.py CHANGED
@@ -1,120 +0,0 @@
1
- import gradio as gr
2
- from PIL import Image
3
- import pytesseract
4
- import json
5
-
6
- import google.generativeai as genai
7
- google_api = 'AIzaSyAMlYqwvuQgekl8nlqc56XTqJVBufszrBU'
8
- genai.configure(api_key=google_api)
9
- from pathlib import Path
10
- # from IPython.display import Markdown
11
-
12
-
13
-
14
-
15
- from PIL import Image
16
- import io
17
-
18
-
19
-
20
-
21
- # Model Configuration
22
- MODEL_CONFIG = {
23
- "temperature": 0.2,
24
- "top_p": 1,
25
- "top_k": 32,
26
- "max_output_tokens": 4096,
27
- }
28
-
29
- ## Safety Settings of Model
30
- safety_settings = [
31
- {
32
- "category": "HARM_CATEGORY_HARASSMENT",
33
- "threshold": "BLOCK_MEDIUM_AND_ABOVE"
34
- },
35
- {
36
- "category": "HARM_CATEGORY_HATE_SPEECH",
37
- "threshold": "BLOCK_MEDIUM_AND_ABOVE"
38
- },
39
- {
40
- "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
41
- "threshold": "BLOCK_MEDIUM_AND_ABOVE"
42
- },
43
- {
44
- "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
45
- "threshold": "BLOCK_MEDIUM_AND_ABOVE"
46
- }
47
- ]
48
-
49
- model = genai.GenerativeModel(model_name='gemini-2.5-flash',
50
- generation_config=MODEL_CONFIG,
51
- safety_settings=safety_settings)
52
-
53
-
54
- def gemini_output(image_path,
55
- system_prompt,
56
- user_prompt):
57
-
58
-
59
- input_prompt = [system_prompt, image_path, user_prompt]
60
- response = model.generate_content(input_prompt)
61
-
62
- return response.text
63
-
64
-
65
-
66
-
67
- custom_css = """
68
- .image_preview {
69
- max-height: 700px; overflow-y: auto !important;
70
- }
71
-
72
- .big-font textarea {
73
- font-size: 20px !important;
74
- }
75
-
76
-
77
- """
78
-
79
- def extract_text(image_path):
80
-
81
- system_prompt = """
82
- You are a specialist in comprehending receipts.
83
- Input images in the form of receipts will be provided to you,
84
- and your task is to respond to questions based on the content of the input image.
85
- """
86
-
87
- user_prompt = "Convert Invoice data into json format with appropriate json tags as required for the data in image "
88
- output = gemini_output(image_path, system_prompt, user_prompt)
89
-
90
-
91
-
92
- output = output.replace("```json", "")
93
- output = output.replace("```", "")
94
-
95
-
96
- print(f">>>>>>> {output}")
97
-
98
-
99
-
100
-
101
- return output
102
-
103
-
104
- # Create the Gradio interface
105
- iface = gr.Interface(
106
- fn=extract_text,
107
- inputs=gr.Image(type="pil", elem_classes=["image_preview"]), # Accept PIL images directly
108
- outputs=gr.Textbox(lines=20,
109
- max_lines=10,
110
- label='Extracted Text',
111
- elem_classes=["big-font"]
112
- ),
113
- title="Text Extraction",
114
- description="Upload an image to extract text",
115
- allow_flagging='never',
116
- css=custom_css,
117
- )
118
-
119
- # Launch the app
120
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
gardio.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from PIL import Image
3
+ import pytesseract
4
+ import json
5
+
6
+ import google.generativeai as genai
7
+ google_api = 'AIzaSyAMlYqwvuQgekl8nlqc56XTqJVBufszrBU'
8
+ genai.configure(api_key=google_api)
9
+ from pathlib import Path
10
+ # from IPython.display import Markdown
11
+
12
+
13
+
14
+
15
+ from PIL import Image
16
+ import io
17
+
18
+
19
+
20
+
21
+ # Model Configuration
22
+ MODEL_CONFIG = {
23
+ "temperature": 0.2,
24
+ "top_p": 1,
25
+ "top_k": 32,
26
+ "max_output_tokens": 4096,
27
+ }
28
+
29
+ ## Safety Settings of Model
30
+ safety_settings = [
31
+ {
32
+ "category": "HARM_CATEGORY_HARASSMENT",
33
+ "threshold": "BLOCK_MEDIUM_AND_ABOVE"
34
+ },
35
+ {
36
+ "category": "HARM_CATEGORY_HATE_SPEECH",
37
+ "threshold": "BLOCK_MEDIUM_AND_ABOVE"
38
+ },
39
+ {
40
+ "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
41
+ "threshold": "BLOCK_MEDIUM_AND_ABOVE"
42
+ },
43
+ {
44
+ "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
45
+ "threshold": "BLOCK_MEDIUM_AND_ABOVE"
46
+ }
47
+ ]
48
+
49
+ model = genai.GenerativeModel(model_name='gemini-2.5-flash',
50
+ generation_config=MODEL_CONFIG,
51
+ safety_settings=safety_settings)
52
+
53
+
54
+ def gemini_output(image_path,
55
+ system_prompt,
56
+ user_prompt):
57
+
58
+
59
+ input_prompt = [system_prompt, image_path, user_prompt]
60
+ response = model.generate_content(input_prompt)
61
+
62
+ return response.text
63
+
64
+
65
+
66
+
67
+ custom_css = """
68
+ .image_preview {
69
+ max-height: 700px; overflow-y: auto !important;
70
+ }
71
+
72
+ .big-font textarea {
73
+ font-size: 20px !important;
74
+ }
75
+
76
+
77
+ """
78
+
79
+ def extract_text(image_path):
80
+
81
+ system_prompt = """
82
+ You are a specialist in comprehending receipts.
83
+ Input images in the form of receipts will be provided to you,
84
+ and your task is to respond to questions based on the content of the input image.
85
+ """
86
+
87
+ user_prompt = "Convert Invoice data into json format with appropriate json tags as required for the data in image "
88
+ output = gemini_output(image_path, system_prompt, user_prompt)
89
+
90
+
91
+
92
+ output = output.replace("```json", "")
93
+ output = output.replace("```", "")
94
+
95
+
96
+ print(f">>>>>>> {output}")
97
+
98
+
99
+
100
+
101
+ return output
102
+
103
+
104
+ # Create the Gradio interface
105
+ iface = gr.Interface(
106
+ fn=extract_text,
107
+ inputs=gr.Image(type="pil", elem_classes=["image_preview"]), # Accept PIL images directly
108
+ outputs=gr.Textbox(lines=20,
109
+ max_lines=10,
110
+ label='Extracted Text',
111
+ elem_classes=["big-font"]
112
+ ),
113
+ title="Text Extraction",
114
+ description="Upload an image to extract text",
115
+ allow_flagging='never',
116
+ css=custom_css,
117
+ )
118
+
119
+ # Launch the app
120
+ iface.launch()