File size: 2,675 Bytes
b4a89a6
 
 
8717c3c
 
 
 
 
 
b45524c
8717c3c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b4a89a6
 
41b8aa3
088172b
41b8aa3
3c50bd4
b4a89a6
 
 
3c50bd4
 
b4a89a6
 
8717c3c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b4a89a6
 
 
 
41ad755
b4a89a6
 
 
 
 
 
 
 
3c50bd4
b4a89a6
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import gradio as gr
from PIL import Image
import pytesseract
import json 

import google.generativeai as genai 
google_api = 'AIzaSyAMlYqwvuQgekl8nlqc56XTqJVBufszrBU'
genai.configure(api_key=google_api)
from pathlib import Path
# from IPython.display import Markdown




from PIL import Image
import io 




# Model Configuration
MODEL_CONFIG = {
  "temperature": 0.2,
  "top_p": 1,
  "top_k": 32,
  "max_output_tokens": 4096,
}

## Safety Settings of Model
safety_settings = [
  {
    "category": "HARM_CATEGORY_HARASSMENT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_HATE_SPEECH",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  }
]

model = genai.GenerativeModel(model_name='gemini-2.5-flash',
                              generation_config=MODEL_CONFIG,
                              safety_settings=safety_settings)


def gemini_output(image_path,
                  system_prompt,
                  user_prompt):
    
    
    input_prompt = [system_prompt, image_path, user_prompt]
    response = model.generate_content(input_prompt)

    return response.text




custom_css = """
    .image_preview {
        max-height: 700px; overflow-y: auto !important;
    }

    .big-font textarea {
        font-size: 20px !important;
    }

    
"""

def extract_text(image_path):
   
    system_prompt = """
               You are a specialist in comprehending receipts.
               Input images in the form of receipts will be provided to you,
               and your task is to respond to questions based on the content of the input image.
               """
    
    user_prompt = "Convert Invoice data into json format with appropriate json tags as required for the data in image "
    output = gemini_output(image_path, system_prompt, user_prompt)

    

    output = output.replace("```json", "")
    output = output.replace("```", "")
    

    print(f">>>>>>> {output}")
    
    


    return output


# Create the Gradio interface
iface = gr.Interface(
    fn=extract_text,
    inputs=gr.Image(type="pil", elem_classes=["image_preview"]),  # Accept PIL images directly
    outputs=gr.Textbox(lines=20,
                       max_lines=10,
                       label='Extracted Text',
                       elem_classes=["big-font"]
                       ),
    title="Text Extraction",
    description="Upload an image to extract text",
    allow_flagging='never',
    css=custom_css,
)

# Launch the app
iface.launch()