Spaces:
Sleeping
Sleeping
Commit
·
3ec8945
0
Parent(s):
Duplicate from deprem-ml/deprem-ocr
Browse filesCo-authored-by: Mert Cobanov <mertcobanov@users.noreply.huggingface.co>
- .gitignore +162 -0
- README.md +13 -0
- app.py +174 -0
- db_utils.py +41 -0
- openai_api.py +31 -0
- requirements.txt +5 -0
- utils.py +53 -0
.gitignore
ADDED
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
wheels/
|
23 |
+
share/python-wheels/
|
24 |
+
*.egg-info/
|
25 |
+
.installed.cfg
|
26 |
+
*.egg
|
27 |
+
MANIFEST
|
28 |
+
|
29 |
+
# PyInstaller
|
30 |
+
# Usually these files are written by a python script from a template
|
31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
32 |
+
*.manifest
|
33 |
+
*.spec
|
34 |
+
|
35 |
+
# Installer logs
|
36 |
+
pip-log.txt
|
37 |
+
pip-delete-this-directory.txt
|
38 |
+
|
39 |
+
# Unit test / coverage reports
|
40 |
+
htmlcov/
|
41 |
+
.tox/
|
42 |
+
.nox/
|
43 |
+
.coverage
|
44 |
+
.coverage.*
|
45 |
+
.cache
|
46 |
+
nosetests.xml
|
47 |
+
coverage.xml
|
48 |
+
*.cover
|
49 |
+
*.py,cover
|
50 |
+
.hypothesis/
|
51 |
+
.pytest_cache/
|
52 |
+
cover/
|
53 |
+
|
54 |
+
# Translations
|
55 |
+
*.mo
|
56 |
+
*.pot
|
57 |
+
|
58 |
+
# Django stuff:
|
59 |
+
*.log
|
60 |
+
local_settings.py
|
61 |
+
db.sqlite3
|
62 |
+
db.sqlite3-journal
|
63 |
+
|
64 |
+
# Flask stuff:
|
65 |
+
instance/
|
66 |
+
.webassets-cache
|
67 |
+
|
68 |
+
# Scrapy stuff:
|
69 |
+
.scrapy
|
70 |
+
|
71 |
+
# Sphinx documentation
|
72 |
+
docs/_build/
|
73 |
+
|
74 |
+
# PyBuilder
|
75 |
+
.pybuilder/
|
76 |
+
target/
|
77 |
+
|
78 |
+
# Jupyter Notebook
|
79 |
+
.ipynb_checkpoints
|
80 |
+
|
81 |
+
# IPython
|
82 |
+
profile_default/
|
83 |
+
ipython_config.py
|
84 |
+
|
85 |
+
# pyenv
|
86 |
+
# For a library or package, you might want to ignore these files since the code is
|
87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
88 |
+
# .python-version
|
89 |
+
|
90 |
+
# pipenv
|
91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
94 |
+
# install all needed dependencies.
|
95 |
+
#Pipfile.lock
|
96 |
+
|
97 |
+
# poetry
|
98 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
100 |
+
# commonly ignored for libraries.
|
101 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
102 |
+
#poetry.lock
|
103 |
+
|
104 |
+
# pdm
|
105 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
106 |
+
#pdm.lock
|
107 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
108 |
+
# in version control.
|
109 |
+
# https://pdm.fming.dev/#use-with-ide
|
110 |
+
.pdm.toml
|
111 |
+
|
112 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
113 |
+
__pypackages__/
|
114 |
+
|
115 |
+
# Celery stuff
|
116 |
+
celerybeat-schedule
|
117 |
+
celerybeat.pid
|
118 |
+
|
119 |
+
# SageMath parsed files
|
120 |
+
*.sage.py
|
121 |
+
|
122 |
+
# Environments
|
123 |
+
.env
|
124 |
+
.venv
|
125 |
+
env/
|
126 |
+
venv/
|
127 |
+
ENV/
|
128 |
+
env.bak/
|
129 |
+
venv.bak/
|
130 |
+
|
131 |
+
# Spyder project settings
|
132 |
+
.spyderproject
|
133 |
+
.spyproject
|
134 |
+
|
135 |
+
# Rope project settings
|
136 |
+
.ropeproject
|
137 |
+
|
138 |
+
# mkdocs documentation
|
139 |
+
/site
|
140 |
+
|
141 |
+
# mypy
|
142 |
+
.mypy_cache/
|
143 |
+
.dmypy.json
|
144 |
+
dmypy.json
|
145 |
+
|
146 |
+
# Pyre type checker
|
147 |
+
.pyre/
|
148 |
+
|
149 |
+
# pytype static type analyzer
|
150 |
+
.pytype/
|
151 |
+
|
152 |
+
# Cython debug symbols
|
153 |
+
cython_debug/
|
154 |
+
|
155 |
+
# PyCharm
|
156 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
157 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
158 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
159 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
160 |
+
#.idea/
|
161 |
+
|
162 |
+
.DS_Store
|
README.md
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Deprem OCR
|
3 |
+
emoji: 👀
|
4 |
+
colorFrom: green
|
5 |
+
colorTo: blue
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 3.17.0
|
8 |
+
app_file: app.py
|
9 |
+
pinned: true
|
10 |
+
duplicated_from: deprem-ml/deprem-ocr
|
11 |
+
---
|
12 |
+
|
13 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from PIL import ImageFilter, Image
|
2 |
+
from easyocr import Reader
|
3 |
+
import gradio as gr
|
4 |
+
import numpy as np
|
5 |
+
import openai
|
6 |
+
import ast
|
7 |
+
import os
|
8 |
+
|
9 |
+
from openai_api import OpenAI_API
|
10 |
+
import utils
|
11 |
+
|
12 |
+
openai.api_key = os.getenv("API_KEY")
|
13 |
+
reader = Reader(["tr"])
|
14 |
+
|
15 |
+
|
16 |
+
def get_text(input_img):
|
17 |
+
img = Image.fromarray(input_img)
|
18 |
+
detailed = np.asarray(img.filter(ImageFilter.DETAIL))
|
19 |
+
result = reader.readtext(detailed, detail=0, paragraph=True)
|
20 |
+
return " ".join(result)
|
21 |
+
|
22 |
+
|
23 |
+
# Submit button
|
24 |
+
def get_parsed_address(input_img):
|
25 |
+
|
26 |
+
address_full_text = get_text(input_img)
|
27 |
+
return openai_response(address_full_text)
|
28 |
+
|
29 |
+
|
30 |
+
def save_deta_db(input):
|
31 |
+
eval_result = ast.literal_eval(input)
|
32 |
+
utils.write_db(eval_result)
|
33 |
+
return
|
34 |
+
|
35 |
+
|
36 |
+
def update_component():
|
37 |
+
return gr.update(value="Gönderildi, teşekkürler.", visible=True)
|
38 |
+
|
39 |
+
|
40 |
+
def clear_textbox(value):
|
41 |
+
return gr.update(value="")
|
42 |
+
|
43 |
+
|
44 |
+
# Open API on change
|
45 |
+
def text_dict(input):
|
46 |
+
eval_result = ast.literal_eval(input)
|
47 |
+
return (
|
48 |
+
str(eval_result["city"]),
|
49 |
+
str(eval_result["distinct"]),
|
50 |
+
str(eval_result["neighbourhood"]),
|
51 |
+
str(eval_result["street"]),
|
52 |
+
str(eval_result["address"]),
|
53 |
+
str(eval_result["tel"]),
|
54 |
+
str(eval_result["name_surname"]),
|
55 |
+
str(eval_result["no"]),
|
56 |
+
)
|
57 |
+
|
58 |
+
|
59 |
+
def openai_response(ocr_input):
|
60 |
+
prompt = f"""Tabular Data Extraction You are a highly intelligent and accurate tabular data extractor from
|
61 |
+
plain text input and especially from emergency text that carries address information, your inputs can be text
|
62 |
+
of arbitrary size, but the output should be in [{{'tabular': {{'entity_type': 'entity'}} }}] JSON format Force it
|
63 |
+
to only extract keys that are shared as an example in the examples section, if a key value is not found in the
|
64 |
+
text input, then it should be ignored. Have only city, distinct, neighbourhood,
|
65 |
+
street, no, tel, name_surname, address Examples:
|
66 |
+
|
67 |
+
Input: Deprem sırasında evimizde yer alan adresimiz: İstanbul, Beşiktaş, Yıldız Mahallesi, Cumhuriyet Caddesi No: 35, cep telefonu numaram 5551231256, adim Ahmet Yilmaz
|
68 |
+
Output: {{'city': 'İstanbul', 'distinct': 'Beşiktaş', 'neighbourhood': 'Yıldız Mahallesi', 'street': 'Cumhuriyet Caddesi', 'no': '35', 'tel': '5551231256', 'name_surname': 'Ahmet Yılmaz', 'address': 'İstanbul, Beşiktaş, Yıldız Mahallesi, Cumhuriyet Caddesi No: 35'}}
|
69 |
+
|
70 |
+
Input: 5.29 PMO $ 0 87 DEVREMİZ ÖZGÜR ORÇAN ARKADAŞIMIZA ULAŞAMIYORUZ BEYOĞLU MAH FEVZİ ÇAKMAK CAD. NO.58-TÜRKOĞLUI KAHRAMANMARAŞ 5524357578 AdReSe YaKIN OLANLAR VEYA ULASANLAR LÜTFEN BiLGILENDIRSIN .
|
71 |
+
Output: {{'city': 'Kahramanmaraş', 'distinct': 'Türkoğlu', 'neighbourhood': 'Beyoğlu Mahallesi', 'street': 'Çakmak Caddesi', 'no': '58', 'tel': '5524357578', 'name_surname': 'Özgür Orçan', 'address': 'Beyoğlu Mahallesi, Çakmak Caddesi, No:58 Türkoğlu/Kahramanmaraş'}}
|
72 |
+
|
73 |
+
Input: Ahmet @ozknhmt Ekim 2021 tarihinde katıldı - 2 Takipçi Takip ettiğin kimse takip etmiyor AKEVLER MAH. 432SK RÜYA APT ANT(BEDİİ SABUNCU KARŞISI) ANTAKYA HATAY MERVE BELANLI ses veriyor ancak hiçbiryardım ekibi olmadığı için kurtaramryoruz içeri girip, lütfen acil yardım_ İsim: Merve Belanlı tel 542 757 5484 Ö0 12.07
|
74 |
+
Output: {{'city': 'Hatay', 'distinct': 'Antakya', 'neighbourhood': 'Akevler Mahallesi', 'street': '432 Sokak', 'no': '', 'tel': '5427575484', 'name_surname': 'Merve Belanlı', 'address': 'Akevler Mahallesi, 432 Sokak, Rüya Apt. Antakya/Hatay'}}
|
75 |
+
|
76 |
+
Input: 14:04 Sümerler Cemil Şükrü Çolokoğlu ilköğretim okulu karşısı 3 9öçük altında yardım bekyouk Lütfen herkes paylogsın
|
77 |
+
Output: {{'city': '', 'distinct': '', 'neighbourhood': 'Sümerler Mahallesi', 'street': 'Cemil Şükrü Çolokoğlu İlköğretim Okulu Karşısı', 'no': '', 'tel': '', 'name_surname': '', 'address': 'Sümerler Mahallesi, Cemil Şükrü Çolokoğlu İlköğretim Okulu Karşısı'}}
|
78 |
+
|
79 |
+
Input: {ocr_input}
|
80 |
+
Output:
|
81 |
+
"""
|
82 |
+
|
83 |
+
openai_client = OpenAI_API()
|
84 |
+
response = openai_client.single_request(prompt)
|
85 |
+
resp = response["choices"][0]["text"]
|
86 |
+
print(resp)
|
87 |
+
resp = eval(resp.replace("'{", "{").replace("}'", "}"))
|
88 |
+
resp["input"] = ocr_input
|
89 |
+
dict_keys = [
|
90 |
+
"city",
|
91 |
+
"distinct",
|
92 |
+
"neighbourhood",
|
93 |
+
"street",
|
94 |
+
"no",
|
95 |
+
"tel",
|
96 |
+
"name_surname",
|
97 |
+
"address",
|
98 |
+
"input",
|
99 |
+
]
|
100 |
+
for key in dict_keys:
|
101 |
+
if key not in resp.keys():
|
102 |
+
resp[key] = ""
|
103 |
+
return resp
|
104 |
+
|
105 |
+
|
106 |
+
# User Interface
|
107 |
+
with gr.Blocks() as demo:
|
108 |
+
gr.Markdown(
|
109 |
+
"""
|
110 |
+
# Enkaz Bildirme Uygulaması
|
111 |
+
"""
|
112 |
+
)
|
113 |
+
gr.Markdown(
|
114 |
+
"Bu uygulamada ekran görüntüsü sürükleyip bırakarak AFAD'a enkaz bildirimi yapabilirsiniz. Mesajı metin olarak da girebilirsiniz, tam adresi ayrıştırıp döndürür. API olarak kullanmak isterseniz sayfanın en altında use via api'ya tıklayın."
|
115 |
+
)
|
116 |
+
with gr.Row():
|
117 |
+
with gr.Column():
|
118 |
+
img_area = gr.Image(label="Ekran Görüntüsü yükleyin 👇")
|
119 |
+
img_area_button = gr.Button(value="Görüntüyü İşle", label="Submit")
|
120 |
+
|
121 |
+
with gr.Column():
|
122 |
+
text_area = gr.Textbox(label="Metin yükleyin 👇 ", lines=8)
|
123 |
+
text_area_button = gr.Button(value="Metni Yükle", label="Submit")
|
124 |
+
|
125 |
+
open_api_text = gr.Textbox(label="Tam Adres")
|
126 |
+
|
127 |
+
with gr.Column():
|
128 |
+
with gr.Row():
|
129 |
+
city = gr.Textbox(label="İl", interactive=True, show_progress=False)
|
130 |
+
distinct = gr.Textbox(label="İlçe", interactive=True, show_progress=False)
|
131 |
+
with gr.Row():
|
132 |
+
neighbourhood = gr.Textbox(
|
133 |
+
label="Mahalle", interactive=True, show_progress=False
|
134 |
+
)
|
135 |
+
street = gr.Textbox(
|
136 |
+
label="Sokak/Cadde/Bulvar", interactive=True, show_progress=False
|
137 |
+
)
|
138 |
+
with gr.Row():
|
139 |
+
tel = gr.Textbox(label="Telefon", interactive=True, show_progress=False)
|
140 |
+
with gr.Row():
|
141 |
+
name_surname = gr.Textbox(
|
142 |
+
label="İsim Soyisim", interactive=True, show_progress=False
|
143 |
+
)
|
144 |
+
address = gr.Textbox(label="Adres", interactive=True, show_progress=False)
|
145 |
+
with gr.Row():
|
146 |
+
no = gr.Textbox(label="Kapı No", interactive=True, show_progress=False)
|
147 |
+
|
148 |
+
img_area_button.click(
|
149 |
+
get_parsed_address,
|
150 |
+
inputs=img_area,
|
151 |
+
outputs=open_api_text,
|
152 |
+
api_name="upload-image",
|
153 |
+
)
|
154 |
+
|
155 |
+
text_area_button.click(
|
156 |
+
openai_response, text_area, open_api_text, api_name="upload-text"
|
157 |
+
)
|
158 |
+
|
159 |
+
open_api_text.change(
|
160 |
+
text_dict,
|
161 |
+
open_api_text,
|
162 |
+
[city, distinct, neighbourhood, street, address, tel, name_surname, no],
|
163 |
+
)
|
164 |
+
|
165 |
+
submit_button = gr.Button(value="Veriyi Birimlere Yolla")
|
166 |
+
submit_button.click(save_deta_db, open_api_text)
|
167 |
+
done_text = gr.Textbox(label="Done", value="Not Done", visible=False)
|
168 |
+
submit_button.click(update_component, outputs=done_text)
|
169 |
+
for txt in [city, distinct, neighbourhood, street, address, tel, name_surname, no]:
|
170 |
+
submit_button.click(fn=clear_textbox, inputs=txt, outputs=txt)
|
171 |
+
|
172 |
+
|
173 |
+
if __name__ == "__main__":
|
174 |
+
demo.launch()
|
db_utils.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from deta import Deta # Import Deta
|
2 |
+
from pprint import pprint
|
3 |
+
import os
|
4 |
+
|
5 |
+
deta_key = os.getenv("DETA_KEY")
|
6 |
+
deta = Deta(deta_key)
|
7 |
+
db = deta.Base("deprem-ocr")
|
8 |
+
|
9 |
+
|
10 |
+
def get_users_by_city(city_name, limit=10):
|
11 |
+
|
12 |
+
user = db.fetch({"city": city_name.capitalize()}, limit=limit).items
|
13 |
+
return user
|
14 |
+
|
15 |
+
|
16 |
+
def get_all():
|
17 |
+
res = db.fetch()
|
18 |
+
all_items = res.items
|
19 |
+
|
20 |
+
# fetch until last is 'None'
|
21 |
+
while res.last:
|
22 |
+
res = db.fetch(last=res.last)
|
23 |
+
all_items += res.items
|
24 |
+
return all_items
|
25 |
+
|
26 |
+
|
27 |
+
def write_db(data_dict):
|
28 |
+
# 2) initialize with a project key
|
29 |
+
deta_key = os.getenv("DETA_KEY")
|
30 |
+
deta = Deta(deta_key)
|
31 |
+
|
32 |
+
# 3) create and use as many DBs as you want!
|
33 |
+
users = deta.Base("deprem-ocr")
|
34 |
+
users.insert(data_dict)
|
35 |
+
print("Pushed to db")
|
36 |
+
|
37 |
+
|
38 |
+
def get_latest_row(last):
|
39 |
+
all_items = get_all()
|
40 |
+
latest_items = all_items[-last:]
|
41 |
+
return latest_items
|
openai_api.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import openai
|
2 |
+
import os
|
3 |
+
|
4 |
+
|
5 |
+
class OpenAI_API:
|
6 |
+
def __init__(self):
|
7 |
+
self.openai_api_key = ""
|
8 |
+
|
9 |
+
def single_request(self, address_text):
|
10 |
+
|
11 |
+
openai.api_type = "azure"
|
12 |
+
openai.api_base = "https://damlaopenai.openai.azure.com/"
|
13 |
+
openai.api_version = "2022-12-01"
|
14 |
+
openai.api_key = os.getenv("API_KEY")
|
15 |
+
|
16 |
+
response = openai.Completion.create(
|
17 |
+
engine="Davinci-003",
|
18 |
+
prompt=address_text,
|
19 |
+
temperature=0.0,
|
20 |
+
max_tokens=500,
|
21 |
+
top_p=1,
|
22 |
+
# n=1,
|
23 |
+
# logprobs=0,
|
24 |
+
# echo=False,
|
25 |
+
stop=["\n"],
|
26 |
+
frequency_penalty=0,
|
27 |
+
presence_penalty=0,
|
28 |
+
# best_of=1,
|
29 |
+
)
|
30 |
+
|
31 |
+
return response
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
openai
|
2 |
+
Pillow
|
3 |
+
easyocr
|
4 |
+
gradio
|
5 |
+
deta
|
utils.py
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
import csv
|
3 |
+
import json
|
4 |
+
from deta import Deta
|
5 |
+
import os
|
6 |
+
import requests
|
7 |
+
|
8 |
+
|
9 |
+
def preprocess_img(inp_image):
|
10 |
+
gray = cv2.cvtColor(inp_image, cv2.COLOR_BGR2GRAY)
|
11 |
+
gray_img = cv2.bitwise_not(gray)
|
12 |
+
return gray_img
|
13 |
+
|
14 |
+
|
15 |
+
def save_csv(mahalle, il, sokak, apartman):
|
16 |
+
adres_full = [mahalle, il, sokak, apartman]
|
17 |
+
|
18 |
+
with open("adress_book.csv", "a", encoding="utf-8") as f:
|
19 |
+
write = csv.writer(f)
|
20 |
+
write.writerow(adres_full)
|
21 |
+
return adres_full
|
22 |
+
|
23 |
+
|
24 |
+
def get_json(mahalle, il, sokak, apartman):
|
25 |
+
adres = {"mahalle": mahalle, "il": il, "sokak": sokak, "apartman": apartman}
|
26 |
+
dump = json.dumps(adres, indent=4, ensure_ascii=False)
|
27 |
+
return dump
|
28 |
+
|
29 |
+
|
30 |
+
def write_db(data_dict):
|
31 |
+
# 2) initialize with a project key
|
32 |
+
deta_key = os.getenv("DETA_KEY")
|
33 |
+
deta = Deta(deta_key)
|
34 |
+
|
35 |
+
# 3) create and use as many DBs as you want!
|
36 |
+
users = deta.Base("deprem-ocr")
|
37 |
+
users.insert(data_dict)
|
38 |
+
|
39 |
+
|
40 |
+
def ner_response(ocr_input):
|
41 |
+
API_URL = "https://api-inference.huggingface.co/models/deprem-ml/deprem-ner"
|
42 |
+
headers = {"Authorization": "Bearer xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}
|
43 |
+
|
44 |
+
def query(payload):
|
45 |
+
response = requests.post(API_URL, headers=headers, json=payload)
|
46 |
+
return response.json()
|
47 |
+
|
48 |
+
output = query(
|
49 |
+
{
|
50 |
+
"inputs": ocr_input,
|
51 |
+
}
|
52 |
+
)
|
53 |
+
return output
|