DepremOCR (PaddleOCR optimized inference pipeline)

#6
by Goodsea - opened
Files changed (8) hide show
  1. .gitattributes +34 -0
  2. .gitignore +0 -162
  3. README.md +2 -2
  4. app.py +131 -85
  5. db_utils.py +0 -41
  6. openai_api.py +0 -31
  7. requirements.txt +11 -3
  8. utils.py +0 -53
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore DELETED
@@ -1,162 +0,0 @@
1
- # Byte-compiled / optimized / DLL files
2
- __pycache__/
3
- *.py[cod]
4
- *$py.class
5
-
6
- # C extensions
7
- *.so
8
-
9
- # Distribution / packaging
10
- .Python
11
- build/
12
- develop-eggs/
13
- dist/
14
- downloads/
15
- eggs/
16
- .eggs/
17
- lib/
18
- lib64/
19
- parts/
20
- sdist/
21
- var/
22
- wheels/
23
- share/python-wheels/
24
- *.egg-info/
25
- .installed.cfg
26
- *.egg
27
- MANIFEST
28
-
29
- # PyInstaller
30
- # Usually these files are written by a python script from a template
31
- # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
- *.manifest
33
- *.spec
34
-
35
- # Installer logs
36
- pip-log.txt
37
- pip-delete-this-directory.txt
38
-
39
- # Unit test / coverage reports
40
- htmlcov/
41
- .tox/
42
- .nox/
43
- .coverage
44
- .coverage.*
45
- .cache
46
- nosetests.xml
47
- coverage.xml
48
- *.cover
49
- *.py,cover
50
- .hypothesis/
51
- .pytest_cache/
52
- cover/
53
-
54
- # Translations
55
- *.mo
56
- *.pot
57
-
58
- # Django stuff:
59
- *.log
60
- local_settings.py
61
- db.sqlite3
62
- db.sqlite3-journal
63
-
64
- # Flask stuff:
65
- instance/
66
- .webassets-cache
67
-
68
- # Scrapy stuff:
69
- .scrapy
70
-
71
- # Sphinx documentation
72
- docs/_build/
73
-
74
- # PyBuilder
75
- .pybuilder/
76
- target/
77
-
78
- # Jupyter Notebook
79
- .ipynb_checkpoints
80
-
81
- # IPython
82
- profile_default/
83
- ipython_config.py
84
-
85
- # pyenv
86
- # For a library or package, you might want to ignore these files since the code is
87
- # intended to run in multiple environments; otherwise, check them in:
88
- # .python-version
89
-
90
- # pipenv
91
- # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
- # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
- # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
- # install all needed dependencies.
95
- #Pipfile.lock
96
-
97
- # poetry
98
- # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
- # This is especially recommended for binary packages to ensure reproducibility, and is more
100
- # commonly ignored for libraries.
101
- # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
- #poetry.lock
103
-
104
- # pdm
105
- # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
- #pdm.lock
107
- # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
- # in version control.
109
- # https://pdm.fming.dev/#use-with-ide
110
- .pdm.toml
111
-
112
- # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113
- __pypackages__/
114
-
115
- # Celery stuff
116
- celerybeat-schedule
117
- celerybeat.pid
118
-
119
- # SageMath parsed files
120
- *.sage.py
121
-
122
- # Environments
123
- .env
124
- .venv
125
- env/
126
- venv/
127
- ENV/
128
- env.bak/
129
- venv.bak/
130
-
131
- # Spyder project settings
132
- .spyderproject
133
- .spyproject
134
-
135
- # Rope project settings
136
- .ropeproject
137
-
138
- # mkdocs documentation
139
- /site
140
-
141
- # mypy
142
- .mypy_cache/
143
- .dmypy.json
144
- dmypy.json
145
-
146
- # Pyre type checker
147
- .pyre/
148
-
149
- # pytype static type analyzer
150
- .pytype/
151
-
152
- # Cython debug symbols
153
- cython_debug/
154
-
155
- # PyCharm
156
- # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157
- # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158
- # and can be added to the global gitignore or merged into this file. For a more nuclear
159
- # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
- #.idea/
161
-
162
- .DS_Store
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md CHANGED
@@ -1,12 +1,12 @@
1
  ---
2
- title: Deprem OCR
3
  emoji: 👀
4
  colorFrom: green
5
  colorTo: blue
6
  sdk: gradio
7
  sdk_version: 3.17.0
8
  app_file: app.py
9
- pinned: true
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Deprem Ocr 2
3
  emoji: 👀
4
  colorFrom: green
5
  colorTo: blue
6
  sdk: gradio
7
  sdk_version: 3.17.0
8
  app_file: app.py
9
+ pinned: false
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -1,81 +1,153 @@
1
- from PIL import ImageFilter, Image
2
- from easyocr import Reader
3
  import gradio as gr
4
- import numpy as np
 
 
5
  import openai
6
  import ast
7
- from transformers import pipeline
8
  import os
 
 
 
9
 
10
- from openai_api import OpenAI_API
11
- import utils
12
 
13
- openai.api_key = os.getenv("API_KEY")
14
- reader = Reader(["tr"])
 
15
 
 
 
16
 
17
- def get_text(input_img):
18
- img = Image.fromarray(input_img)
19
- detailed = np.asarray(img.filter(ImageFilter.DETAIL))
20
- result = reader.readtext(detailed, detail=0, paragraph=True)
21
- return " ".join(result)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
 
24
- # Submit button
25
  def get_parsed_address(input_img):
26
 
27
  address_full_text = get_text(input_img)
28
- return ner_response(address_full_text)
29
 
30
 
31
- def save_deta_db(input):
32
- eval_result = ast.literal_eval(input)
33
- utils.write_db(eval_result)
34
- return
35
 
36
 
37
- def update_component():
38
- return gr.update(value="Gönderildi, teşekkürler.", visible=True)
 
 
 
 
 
 
 
 
 
 
 
39
 
40
 
41
- def clear_textbox(value):
42
- return gr.update(value="")
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
 
45
  def text_dict(input):
46
  eval_result = ast.literal_eval(input)
 
 
47
  return (
48
- str(eval_result["il"]),
49
- str(eval_result["ilce"]),
50
- str(eval_result["mahalle"]),
51
- str(eval_result["sokak"]),
52
- str(eval_result["Apartman/site"]),
 
 
53
  str(eval_result["no"]),
54
- str(eval_result["ad-soyad"]),
55
- str(eval_result["dis kapi no"]),
56
  )
57
 
58
 
59
- def ner_response(ocr_input):
60
-
61
- ner_pipe = pipeline("token-classification","deprem-ml/deprem-ner", aggregation_strategy="first")
62
- predictions = ner_pipe(ocr_input)
63
- resp = {}
 
 
 
 
 
 
 
64
 
65
- for item in predictions:
66
- print(item)
67
- key = item["entity_group"]
68
- resp[key] = item["word"]
69
-
70
  resp["input"] = ocr_input
71
- dict_keys = ["il", "ilce", "mahalle", "sokak", "Apartman/site", "no", "ad-soyad", "dis kapi no"]
 
 
 
 
 
 
 
 
 
 
72
  for key in dict_keys:
73
  if key not in resp.keys():
74
  resp[key] = ""
75
  return resp
76
 
77
 
78
- # User Interface
79
  with gr.Blocks() as demo:
80
  gr.Markdown(
81
  """
@@ -86,68 +158,42 @@ with gr.Blocks() as demo:
86
  "Bu uygulamada ekran görüntüsü sürükleyip bırakarak AFAD'a enkaz bildirimi yapabilirsiniz. Mesajı metin olarak da girebilirsiniz, tam adresi ayrıştırıp döndürür. API olarak kullanmak isterseniz sayfanın en altında use via api'ya tıklayın."
87
  )
88
  with gr.Row():
89
- with gr.Column():
90
- img_area = gr.Image(label="Ekran Görüntüsü yükleyin 👇")
91
- img_area_button = gr.Button(value="Görüntüyü İşle", label="Submit")
92
-
93
- with gr.Column():
94
- text_area = gr.Textbox(label="Metin yükleyin 👇 ", lines=8)
95
- text_area_button = gr.Button(value="Metni Yükle", label="Submit")
96
-
97
  open_api_text = gr.Textbox(label="Tam Adres")
98
-
99
  with gr.Column():
100
  with gr.Row():
101
- il = gr.Textbox(label="İl", interactive=True, show_progress=False)
102
- ilce = gr.Textbox(label="İlçe", interactive=True, show_progress=False)
103
  with gr.Row():
104
- mahalle = gr.Textbox(
105
- label="Mahalle", interactive=True, show_progress=False
106
- )
107
- sokak = gr.Textbox(
108
- label="Sokak/Cadde/Bulvar", interactive=True, show_progress=False
109
- )
110
  with gr.Row():
111
- no = gr.Textbox(label="Telefon", interactive=True, show_progress=False)
112
  with gr.Row():
113
- ad_soyad = gr.Textbox(
114
- label="İsim Soyisim", interactive=True, show_progress=False
115
- )
116
- apartman = gr.Textbox(label="apartman", interactive=True, show_progress=False)
117
  with gr.Row():
118
- dis_kapi_no = gr.Textbox(label="Kapı No", interactive=True, show_progress=False)
119
 
120
- img_area_button.click(
121
  get_parsed_address,
122
  inputs=img_area,
123
  outputs=open_api_text,
124
- api_name="upload-image",
125
  )
126
 
127
- text_area_button.click(
128
- ner_response, text_area, open_api_text, api_name="upload-text"
129
  )
130
 
131
-
132
  open_api_text.change(
133
  text_dict,
134
  open_api_text,
135
- [il, ilce, mahalle, sokak, no, apartman, ad_soyad, dis_kapi_no],
136
- )
137
- ocr_button = gr.Button(value="Sadece OCR kullan")
138
- ocr_button.click(
139
- get_text,
140
- inputs=img_area,
141
- outputs=text_area,
142
- api_name="get-ocr-output",
143
  )
144
- submit_button = gr.Button(value="Veriyi Birimlere Yolla")
145
- submit_button.click(save_deta_db, open_api_text)
146
- done_text = gr.Textbox(label="Done", value="Not Done", visible=False)
147
- submit_button.click(update_component, outputs=done_text)
148
- for txt in [il, ilce, mahalle, sokak, apartman, no, ad_soyad, dis_kapi_no]:
149
- submit_button.click(fn=clear_textbox, inputs=txt, outputs=txt)
150
 
151
 
152
  if __name__ == "__main__":
153
- demo.launch()
 
 
 
1
  import gradio as gr
2
+ from deprem_ocr.ocr import DepremOCR
3
+ import json
4
+ import csv
5
  import openai
6
  import ast
 
7
  import os
8
+ import cv2
9
+ import numpy as np
10
+ from deta import Deta
11
 
 
 
12
 
13
+ ######################
14
+ import requests
15
+ import json
16
 
17
+ import os
18
+ import openai
19
 
20
+
21
+ class OpenAI_API:
22
+ def __init__(self):
23
+ self.openai_api_key = ""
24
+
25
+ def single_request(self, address_text):
26
+
27
+ openai.api_type = "azure"
28
+ openai.api_base = "https://damlaopenai.openai.azure.com/"
29
+ openai.api_version = "2022-12-01"
30
+ openai.api_key = os.getenv("API_KEY")
31
+
32
+ response = openai.Completion.create(
33
+ engine="Davinci-003",
34
+ prompt=address_text,
35
+ temperature=0.9,
36
+ max_tokens=256,
37
+ top_p=1.0,
38
+ n=1,
39
+ logprobs=0,
40
+ echo=False,
41
+ stop=None,
42
+ frequency_penalty=0,
43
+ presence_penalty=0,
44
+ best_of=1,
45
+ )
46
+
47
+ return response
48
+
49
+
50
+ ########################
51
+
52
+ openai.api_key = os.getenv("API_KEY")
53
+ depremOCR = DepremOCR()
54
 
55
 
 
56
  def get_parsed_address(input_img):
57
 
58
  address_full_text = get_text(input_img)
59
+ return openai_response(address_full_text)
60
 
61
 
62
+ def preprocess_img(inp_image):
63
+ gray = cv2.cvtColor(inp_image, cv2.COLOR_BGR2GRAY)
64
+ gray_img = cv2.bitwise_not(gray)
65
+ return gray_img
66
 
67
 
68
+ def get_text(input_img):
69
+ result = depremOCR.apply_ocr(np.array(input_img))
70
+ print(result)
71
+ return " ".join(result)
72
+
73
+
74
+ def save_csv(mahalle, il, sokak, apartman):
75
+ adres_full = [mahalle, il, sokak, apartman]
76
+
77
+ with open("adress_book.csv", "a", encoding="utf-8") as f:
78
+ write = csv.writer(f)
79
+ write.writerow(adres_full)
80
+ return adres_full
81
 
82
 
83
+ def get_json(mahalle, il, sokak, apartman):
84
+ adres = {"mahalle": mahalle, "il": il, "sokak": sokak, "apartman": apartman}
85
+ dump = json.dumps(adres, indent=4, ensure_ascii=False)
86
+ return dump
87
+
88
+
89
+ def write_db(data_dict):
90
+ # 2) initialize with a project key
91
+ deta_key = os.getenv("DETA_KEY")
92
+ deta = Deta(deta_key)
93
+
94
+ # 3) create and use as many DBs as you want!
95
+ users = deta.Base("deprem-ocr")
96
+ users.insert(data_dict)
97
 
98
 
99
  def text_dict(input):
100
  eval_result = ast.literal_eval(input)
101
+ write_db(eval_result)
102
+
103
  return (
104
+ str(eval_result["city"]),
105
+ str(eval_result["distinct"]),
106
+ str(eval_result["neighbourhood"]),
107
+ str(eval_result["street"]),
108
+ str(eval_result["address"]),
109
+ str(eval_result["tel"]),
110
+ str(eval_result["name_surname"]),
111
  str(eval_result["no"]),
 
 
112
  )
113
 
114
 
115
+ def openai_response(ocr_input):
116
+ prompt = f"""Tabular Data Extraction You are a highly intelligent and accurate tabular data extractor from
117
+ plain text input and especially from emergency text that carries address information, your inputs can be text
118
+ of arbitrary size, but the output should be in [{{'tabular': {{'entity_type': 'entity'}} }}] JSON format Force it
119
+ to only extract keys that are shared as an example in the examples section, if a key value is not found in the
120
+ text input, then it should be ignored. Have only city, distinct, neighbourhood,
121
+ street, no, tel, name_surname, address Examples: Input: Deprem sırasında evimizde yer alan adresimiz: İstanbul,
122
+ Beşiktaş, Yıldız Mahallesi, Cumhuriyet Caddesi No: 35, cep telefonu numaram 5551231256, adim Ahmet Yilmaz
123
+ Output: {{'city': 'İstanbul', 'distinct': 'Beşiktaş', 'neighbourhood': 'Yıldız Mahallesi', 'street': 'Cumhuriyet Caddesi', 'no': '35', 'tel': '5551231256', 'name_surname': 'Ahmet Yılmaz', 'address': 'İstanbul, Beşiktaş, Yıldız Mahallesi, Cumhuriyet Caddesi No: 35'}}
124
+ Input: {ocr_input}
125
+ Output:
126
+ """
127
 
128
+ openai_client = OpenAI_API()
129
+ response = openai_client.single_request(ocr_input)
130
+ resp = response["choices"][0]["text"]
131
+ print(resp)
132
+ resp = eval(resp.replace("'{", "{").replace("}'", "}"))
133
  resp["input"] = ocr_input
134
+ dict_keys = [
135
+ "city",
136
+ "distinct",
137
+ "neighbourhood",
138
+ "street",
139
+ "no",
140
+ "tel",
141
+ "name_surname",
142
+ "address",
143
+ "input",
144
+ ]
145
  for key in dict_keys:
146
  if key not in resp.keys():
147
  resp[key] = ""
148
  return resp
149
 
150
 
 
151
  with gr.Blocks() as demo:
152
  gr.Markdown(
153
  """
 
158
  "Bu uygulamada ekran görüntüsü sürükleyip bırakarak AFAD'a enkaz bildirimi yapabilirsiniz. Mesajı metin olarak da girebilirsiniz, tam adresi ayrıştırıp döndürür. API olarak kullanmak isterseniz sayfanın en altında use via api'ya tıklayın."
159
  )
160
  with gr.Row():
161
+ img_area = gr.Image(label="Ekran Görüntüsü yükleyin 👇")
162
+ ocr_result = gr.Textbox(label="Metin yükleyin 👇 ")
 
 
 
 
 
 
163
  open_api_text = gr.Textbox(label="Tam Adres")
164
+ submit_button = gr.Button(label="Yükle")
165
  with gr.Column():
166
  with gr.Row():
167
+ city = gr.Textbox(label="İl")
168
+ distinct = gr.Textbox(label="İlçe")
169
  with gr.Row():
170
+ neighbourhood = gr.Textbox(label="Mahalle")
171
+ street = gr.Textbox(label="Sokak/Cadde/Bulvar")
 
 
 
 
172
  with gr.Row():
173
+ tel = gr.Textbox(label="Telefon")
174
  with gr.Row():
175
+ name_surname = gr.Textbox(label="İsim Soyisim")
176
+ address = gr.Textbox(label="Adres")
 
 
177
  with gr.Row():
178
+ no = gr.Textbox(label="Kapı No")
179
 
180
+ submit_button.click(
181
  get_parsed_address,
182
  inputs=img_area,
183
  outputs=open_api_text,
184
+ api_name="upload_image",
185
  )
186
 
187
+ ocr_result.change(
188
+ openai_response, ocr_result, open_api_text, api_name="upload-text"
189
  )
190
 
 
191
  open_api_text.change(
192
  text_dict,
193
  open_api_text,
194
+ [city, distinct, neighbourhood, street, address, tel, name_surname, no],
 
 
 
 
 
 
 
195
  )
 
 
 
 
 
 
196
 
197
 
198
  if __name__ == "__main__":
199
+ demo.launch()
db_utils.py DELETED
@@ -1,41 +0,0 @@
1
- from deta import Deta # Import Deta
2
- from pprint import pprint
3
- import os
4
-
5
- deta_key = os.getenv("DETA_KEY")
6
- deta = Deta(deta_key)
7
- db = deta.Base("deprem-ocr")
8
-
9
-
10
- def get_users_by_city(city_name, limit=10):
11
-
12
- user = db.fetch({"city": city_name.capitalize()}, limit=limit).items
13
- return user
14
-
15
-
16
- def get_all():
17
- res = db.fetch()
18
- all_items = res.items
19
-
20
- # fetch until last is 'None'
21
- while res.last:
22
- res = db.fetch(last=res.last)
23
- all_items += res.items
24
- return all_items
25
-
26
-
27
- def write_db(data_dict):
28
- # 2) initialize with a project key
29
- deta_key = os.getenv("DETA_KEY")
30
- deta = Deta(deta_key)
31
-
32
- # 3) create and use as many DBs as you want!
33
- users = deta.Base("deprem-ocr")
34
- users.insert(data_dict)
35
- print("Pushed to db")
36
-
37
-
38
- def get_latest_row(last):
39
- all_items = get_all()
40
- latest_items = all_items[-last:]
41
- return latest_items
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
openai_api.py DELETED
@@ -1,31 +0,0 @@
1
- import openai
2
- import os
3
-
4
-
5
- class OpenAI_API:
6
- def __init__(self):
7
- self.openai_api_key = ""
8
-
9
- def single_request(self, address_text):
10
-
11
- openai.api_type = "azure"
12
- openai.api_base = "https://afet-org.openai.azure.com/"
13
- openai.api_version = "2022-12-01"
14
- openai.api_key = os.getenv("API_KEY")
15
-
16
- response = openai.Completion.create(
17
- engine="afet-org",
18
- prompt=address_text,
19
- temperature=0.0,
20
- max_tokens=500,
21
- top_p=1,
22
- # n=1,
23
- # logprobs=0,
24
- # echo=False,
25
- stop=["\n"],
26
- frequency_penalty=0,
27
- presence_penalty=0,
28
- # best_of=1,
29
- )
30
-
31
- return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,6 +1,14 @@
 
 
 
 
 
 
 
 
 
 
1
  openai
2
  Pillow
3
- easyocr
4
  gradio
5
- deta
6
- transformers
 
1
+ paddlepaddle
2
+ opencv-python
3
+ Pillow
4
+ numpy==1.23.3
5
+ pandas
6
+ imutils
7
+ Cython
8
+ imgaug
9
+ pyclipper
10
+ deprem_ocr
11
  openai
12
  Pillow
 
13
  gradio
14
+ deta
 
utils.py DELETED
@@ -1,53 +0,0 @@
1
- import cv2
2
- import csv
3
- import json
4
- from deta import Deta
5
- import os
6
- import requests
7
-
8
-
9
- def preprocess_img(inp_image):
10
- gray = cv2.cvtColor(inp_image, cv2.COLOR_BGR2GRAY)
11
- gray_img = cv2.bitwise_not(gray)
12
- return gray_img
13
-
14
-
15
- def save_csv(mahalle, il, sokak, apartman):
16
- adres_full = [mahalle, il, sokak, apartman]
17
-
18
- with open("adress_book.csv", "a", encoding="utf-8") as f:
19
- write = csv.writer(f)
20
- write.writerow(adres_full)
21
- return adres_full
22
-
23
-
24
- def get_json(mahalle, il, sokak, apartman):
25
- adres = {"mahalle": mahalle, "il": il, "sokak": sokak, "apartman": apartman}
26
- dump = json.dumps(adres, indent=4, ensure_ascii=False)
27
- return dump
28
-
29
-
30
- def write_db(data_dict):
31
- # 2) initialize with a project key
32
- deta_key = os.getenv("DETA_KEY")
33
- deta = Deta(deta_key)
34
-
35
- # 3) create and use as many DBs as you want!
36
- users = deta.Base("deprem-ocr")
37
- users.insert(data_dict)
38
-
39
-
40
- def ner_response(ocr_input):
41
- API_URL = "https://api-inference.huggingface.co/models/deprem-ml/deprem-ner"
42
- headers = {"Authorization": "Bearer xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}
43
-
44
- def query(payload):
45
- response = requests.post(API_URL, headers=headers, json=payload)
46
- return response.json()
47
-
48
- output = query(
49
- {
50
- "inputs": ocr_input,
51
- }
52
- )
53
- return output