test_gradio / app.py
wddw12332e's picture
Update app.py
740b8c2 verified
raw
history blame contribute delete
No virus
4.8 kB
import os
import gradio as gr
def correct_spell(inputs):
return "res"
def process_text_in_chunks(text, process_function, max_chunk_size=256):
# Split text into sentences
sentences = re.split(r'(?<=[.!?])\s+', text)
processed_text = ""
for sentence in sentences:
# Further split long sentences into smaller chunks
chunks = [sentence[i:i + max_chunk_size] for i in range(0, len(sentence), max_chunk_size)]
for chunk in chunks:
processed_text += process_function(chunk)
processed_text += " " # Add space after each processed sentence
return processed_text.strip()
def greet(img, apply_grammar_correction, apply_spell_check,lang_of_input):
if (lang_of_input=="Hindi"):
res = pt.image_to_string(img,lang='hin')
_output_name = "RESULT_OCR.txt"
open(_output_name, 'w').write(res)
return res, _output_name
if (lang_of_input=="Punjabi"):
res = pt.image_to_string(img,lang='pan')
_output_name = "RESULT_OCR.txt"
open(_output_name, 'w').write(res)
return res, _output_name
img.save("out.jpg")
doc = DocumentFile.from_images("out.jpg")
output = OCRpredictor(doc)
res = ""
for obj in output.pages:
for obj1 in obj.blocks:
for obj2 in obj1.lines:
for obj3 in obj2.words:
res += " " + obj3.value
res += "\n"
res += "\n"
# Process in chunks for grammar correction
if apply_grammar_correction:
res = process_text_in_chunks(res, lambda x: happy_tt.generate_text("grammar: " + x, args=grammar_args).text)
# Process in chunks for spell check
if apply_spell_check:
res = process_text_in_chunks(res, correct_spell)
_output_name = "RESULT_OCR.txt"
open(_output_name, 'w').write(res)
return res, _output_name
# Gradio Interface for OCR
demo_ocr = gr.Interface(
fn=greet,
inputs=[
gr.Image(type="pil"),
gr.Checkbox(label="Apply Grammar Correction"),
gr.Checkbox(label="Apply Spell Check"),
gr.Dropdown(["English","Hindi","Punjabi"],label="Select Language")
],
outputs=["text", "file"],
title="DocTR OCR with Grammar and Spell Check",
description="Upload an image to get the OCR results. Optionally, apply grammar and spell check."
)
# demo_ocr.launch(debug=True)
def split_text_into_batches(text, max_tokens_per_batch):
sentences = text # Tokenize text into sentences
batches = []
current_batch = ""
for sentence in sentences:
if len(current_batch) + len(sentence) + 1 <= max_tokens_per_batch: # Add 1 for space
current_batch += sentence + " " # Add sentence to current batch
else:
batches.append(current_batch.strip()) # Add current batch to batches list
current_batch = sentence + " " # Start a new batch with the current sentence
if current_batch:
batches.append(current_batch.strip()) # Add the last batch
return batches
def run_t2tt(file_uploader , input_text: str, source_language: str, target_language: str) -> (str, bytes):
if file_uploader is not None:
with open(file_uploader, 'r') as file:
input_text=file.read()
source_language_code = []
target_language_code = []
max_tokens_per_batch= 256
batches = split_text_into_batches(input_text, max_tokens_per_batch)
translated_text = ""
return "hello"
with gr.Blocks() as demo_t2tt:
with gr.Row():
with gr.Column():
with gr.Group():
file_uploader = gr.File(label="Upload a text file (Optional)")
input_text = gr.Textbox(label="Input text")
with gr.Row():
source_language = gr.Dropdown(
label="Source language",
choices=[],
value="Punjabi",
)
target_language = gr.Dropdown(
label="Target language",
choices=[],
value=[],
)
btn = gr.Button("Translate")
with gr.Column():
output_text = gr.Textbox(label="Translated text")
output_file = gr.File(label="Translated text file")
gr.on(
triggers=[input_text.submit, btn.click],
fn=run_t2tt,
inputs=[file_uploader, input_text, source_language, target_language],
outputs=[output_text, output_file],
api_name="t2tt",
)
with gr.Blocks() as demo:
with gr.Tabs():
with gr.Tab(label="OCR"):
demo_ocr.render()
with gr.Tab(label="Translate"):
demo_t2tt.render()
if __name__ == "__main__":
demo.launch()