Fix Merge Conflict: DepremOCR (PaddleOCR optimized inference pipeline)

#7
by Goodsea - opened
Files changed (2) hide show
  1. app.py +68 -51
  2. requirements.txt +11 -2
app.py CHANGED
@@ -1,12 +1,12 @@
1
  import gradio as gr
2
- from easyocr import Reader
3
- from PIL import Image
4
- import io
5
  import json
6
  import csv
7
  import openai
8
  import ast
9
  import os
 
 
10
  from deta import Deta
11
 
12
 
@@ -18,38 +18,39 @@ import os
18
  import openai
19
 
20
 
21
-
22
  class OpenAI_API:
23
  def __init__(self):
24
- self.openai_api_key = ''
25
-
26
  def single_request(self, address_text):
27
-
28
  openai.api_type = "azure"
29
  openai.api_base = "https://damlaopenai.openai.azure.com/"
30
  openai.api_version = "2022-12-01"
31
  openai.api_key = os.getenv("API_KEY")
32
-
33
  response = openai.Completion.create(
34
- engine="Davinci-003",
35
- prompt=address_text,
36
- temperature=0.9,
37
- max_tokens=256,
38
- top_p=1.0,
39
- n=1,
40
- logprobs=0,
41
- echo=False,
42
- stop=None,
43
- frequency_penalty=0,
44
- presence_penalty=0,
45
- best_of=1)
 
46
 
47
  return response
48
 
 
49
  ########################
50
 
51
- openai.api_key = os.getenv('API_KEY')
52
- reader = Reader(["tr"])
53
 
54
 
55
  def get_parsed_address(input_img):
@@ -65,7 +66,8 @@ def preprocess_img(inp_image):
65
 
66
 
67
  def get_text(input_img):
68
- result = reader.readtext(input_img, detail=0)
 
69
  return " ".join(result)
70
 
71
 
@@ -83,9 +85,10 @@ def get_json(mahalle, il, sokak, apartman):
83
  dump = json.dumps(adres, indent=4, ensure_ascii=False)
84
  return dump
85
 
 
86
  def write_db(data_dict):
87
  # 2) initialize with a project key
88
- deta_key = os.getenv('DETA_KEY')
89
  deta = Deta(deta_key)
90
 
91
  # 3) create and use as many DBs as you want!
@@ -98,16 +101,17 @@ def text_dict(input):
98
  write_db(eval_result)
99
 
100
  return (
101
- str(eval_result['city']),
102
- str(eval_result['distinct']),
103
- str(eval_result['neighbourhood']),
104
- str(eval_result['street']),
105
- str(eval_result['address']),
106
- str(eval_result['tel']),
107
- str(eval_result['name_surname']),
108
- str(eval_result['no']),
109
  )
110
-
 
111
  def openai_response(ocr_input):
112
  prompt = f"""Tabular Data Extraction You are a highly intelligent and accurate tabular data extractor from
113
  plain text input and especially from emergency text that carries address information, your inputs can be text
@@ -128,28 +132,31 @@ def openai_response(ocr_input):
128
  resp = eval(resp.replace("'{", "{").replace("}'", "}"))
129
  resp["input"] = ocr_input
130
  dict_keys = [
131
- 'city',
132
- 'distinct',
133
- 'neighbourhood',
134
- 'street',
135
- 'no',
136
- 'tel',
137
- 'name_surname',
138
- 'address',
139
- 'input',
140
  ]
141
  for key in dict_keys:
142
  if key not in resp.keys():
143
- resp[key] = ''
144
  return resp
145
 
146
 
147
  with gr.Blocks() as demo:
148
  gr.Markdown(
149
- """
150
  # Enkaz Bildirme Uygulaması
151
- """)
152
- gr.Markdown("Bu uygulamada ekran görüntüsü sürükleyip bırakarak AFAD'a enkaz bildirimi yapabilirsiniz. Mesajı metin olarak da girebilirsiniz, tam adresi ayrıştırıp döndürür. API olarak kullanmak isterseniz sayfanın en altında use via api'ya tıklayın.")
 
 
 
153
  with gr.Row():
154
  img_area = gr.Image(label="Ekran Görüntüsü yükleyin 👇")
155
  ocr_result = gr.Textbox(label="Metin yükleyin 👇 ")
@@ -170,13 +177,23 @@ with gr.Blocks() as demo:
170
  with gr.Row():
171
  no = gr.Textbox(label="Kapı No")
172
 
 
 
 
 
 
 
173
 
174
- submit_button.click(get_parsed_address, inputs = img_area, outputs = open_api_text, api_name="upload_image")
175
-
176
- ocr_result.change(openai_response, ocr_result, open_api_text, api_name="upload-text")
177
 
178
- open_api_text.change(text_dict, open_api_text, [city, distinct, neighbourhood, street, address, tel, name_surname, no])
 
 
 
 
179
 
180
 
181
  if __name__ == "__main__":
182
- demo.launch()
 
1
  import gradio as gr
2
+ from deprem_ocr.ocr import DepremOCR
 
 
3
  import json
4
  import csv
5
  import openai
6
  import ast
7
  import os
8
+ import cv2
9
+ import numpy as np
10
  from deta import Deta
11
 
12
 
 
18
  import openai
19
 
20
 
 
21
  class OpenAI_API:
22
  def __init__(self):
23
+ self.openai_api_key = ""
24
+
25
  def single_request(self, address_text):
26
+
27
  openai.api_type = "azure"
28
  openai.api_base = "https://damlaopenai.openai.azure.com/"
29
  openai.api_version = "2022-12-01"
30
  openai.api_key = os.getenv("API_KEY")
31
+
32
  response = openai.Completion.create(
33
+ engine="Davinci-003",
34
+ prompt=address_text,
35
+ temperature=0.9,
36
+ max_tokens=256,
37
+ top_p=1.0,
38
+ n=1,
39
+ logprobs=0,
40
+ echo=False,
41
+ stop=None,
42
+ frequency_penalty=0,
43
+ presence_penalty=0,
44
+ best_of=1,
45
+ )
46
 
47
  return response
48
 
49
+
50
  ########################
51
 
52
+ openai.api_key = os.getenv("API_KEY")
53
+ depremOCR = DepremOCR()
54
 
55
 
56
  def get_parsed_address(input_img):
 
66
 
67
 
68
  def get_text(input_img):
69
+ result = depremOCR.apply_ocr(np.array(input_img))
70
+ print(result)
71
  return " ".join(result)
72
 
73
 
 
85
  dump = json.dumps(adres, indent=4, ensure_ascii=False)
86
  return dump
87
 
88
+
89
  def write_db(data_dict):
90
  # 2) initialize with a project key
91
+ deta_key = os.getenv("DETA_KEY")
92
  deta = Deta(deta_key)
93
 
94
  # 3) create and use as many DBs as you want!
 
101
  write_db(eval_result)
102
 
103
  return (
104
+ str(eval_result["city"]),
105
+ str(eval_result["distinct"]),
106
+ str(eval_result["neighbourhood"]),
107
+ str(eval_result["street"]),
108
+ str(eval_result["address"]),
109
+ str(eval_result["tel"]),
110
+ str(eval_result["name_surname"]),
111
+ str(eval_result["no"]),
112
  )
113
+
114
+
115
  def openai_response(ocr_input):
116
  prompt = f"""Tabular Data Extraction You are a highly intelligent and accurate tabular data extractor from
117
  plain text input and especially from emergency text that carries address information, your inputs can be text
 
132
  resp = eval(resp.replace("'{", "{").replace("}'", "}"))
133
  resp["input"] = ocr_input
134
  dict_keys = [
135
+ "city",
136
+ "distinct",
137
+ "neighbourhood",
138
+ "street",
139
+ "no",
140
+ "tel",
141
+ "name_surname",
142
+ "address",
143
+ "input",
144
  ]
145
  for key in dict_keys:
146
  if key not in resp.keys():
147
+ resp[key] = ""
148
  return resp
149
 
150
 
151
  with gr.Blocks() as demo:
152
  gr.Markdown(
153
+ """
154
  # Enkaz Bildirme Uygulaması
155
+ """
156
+ )
157
+ gr.Markdown(
158
+ "Bu uygulamada ekran görüntüsü sürükleyip bırakarak AFAD'a enkaz bildirimi yapabilirsiniz. Mesajı metin olarak da girebilirsiniz, tam adresi ayrıştırıp döndürür. API olarak kullanmak isterseniz sayfanın en altında use via api'ya tıklayın."
159
+ )
160
  with gr.Row():
161
  img_area = gr.Image(label="Ekran Görüntüsü yükleyin 👇")
162
  ocr_result = gr.Textbox(label="Metin yükleyin 👇 ")
 
177
  with gr.Row():
178
  no = gr.Textbox(label="Kapı No")
179
 
180
+ submit_button.click(
181
+ get_parsed_address,
182
+ inputs=img_area,
183
+ outputs=open_api_text,
184
+ api_name="upload_image",
185
+ )
186
 
187
+ ocr_result.change(
188
+ openai_response, ocr_result, open_api_text, api_name="upload-text"
189
+ )
190
 
191
+ open_api_text.change(
192
+ text_dict,
193
+ open_api_text,
194
+ [city, distinct, neighbourhood, street, address, tel, name_surname, no],
195
+ )
196
 
197
 
198
  if __name__ == "__main__":
199
+ demo.launch()
requirements.txt CHANGED
@@ -1,5 +1,14 @@
 
 
 
 
 
 
 
 
 
 
1
  openai
2
  Pillow
3
- easyocr
4
  gradio
5
- deta
 
1
+ paddlepaddle
2
+ opencv-python
3
+ Pillow
4
+ numpy==1.23.3
5
+ pandas
6
+ imutils
7
+ Cython
8
+ imgaug
9
+ pyclipper
10
+ deprem_ocr
11
  openai
12
  Pillow
 
13
  gradio
14
+ deta