Spaces:
Sleeping
Sleeping
sotirios-slv
commited on
Commit
•
858ad43
1
Parent(s):
d8d40d6
Testing token POS funciton
Browse files
app.py
CHANGED
@@ -19,14 +19,13 @@ def get_named_entities(ocr_text: str):
|
|
19 |
sentence = [Sentence(sent, use_tokenizer=True) for sent in split_single(ocr_text)]
|
20 |
tagger.predict(sentence)
|
21 |
|
22 |
-
# entities = [entity for entity in sent.get_spans("ner") for sent in sentence]
|
23 |
entities = []
|
24 |
|
25 |
-
for
|
26 |
-
for entity in
|
27 |
entities.append(entity)
|
|
|
28 |
|
29 |
-
print("Entities ", entities)
|
30 |
return entities
|
31 |
|
32 |
|
@@ -51,7 +50,6 @@ def get_named_entities(ocr_text: str):
|
|
51 |
|
52 |
|
53 |
def run(image, lang="eng"):
|
54 |
-
print("Lang ", lang)
|
55 |
result = pytesseract.image_to_string(image, lang=None if lang == [] else lang)
|
56 |
|
57 |
ner = get_named_entities(result)
|
@@ -63,7 +61,7 @@ with gr.Blocks() as demo:
|
|
63 |
with gr.Row():
|
64 |
with gr.Column():
|
65 |
image_in = gr.Image(type="pil")
|
66 |
-
lang = gr.Dropdown(choices)
|
67 |
btn = gr.Button("Run")
|
68 |
with gr.Column():
|
69 |
text_out = gr.TextArea()
|
|
|
19 |
sentence = [Sentence(sent, use_tokenizer=True) for sent in split_single(ocr_text)]
|
20 |
tagger.predict(sentence)
|
21 |
|
|
|
22 |
entities = []
|
23 |
|
24 |
+
for token in sentence:
|
25 |
+
for entity in token.get_spans("ner"):
|
26 |
entities.append(entity)
|
27 |
+
print(token.get_tag("pos").value)
|
28 |
|
|
|
29 |
return entities
|
30 |
|
31 |
|
|
|
50 |
|
51 |
|
52 |
def run(image, lang="eng"):
|
|
|
53 |
result = pytesseract.image_to_string(image, lang=None if lang == [] else lang)
|
54 |
|
55 |
ner = get_named_entities(result)
|
|
|
61 |
with gr.Row():
|
62 |
with gr.Column():
|
63 |
image_in = gr.Image(type="pil")
|
64 |
+
lang = gr.Dropdown(choices, value="eng")
|
65 |
btn = gr.Button("Run")
|
66 |
with gr.Column():
|
67 |
text_out = gr.TextArea()
|