Spaces:

wddw12332e
/

test_gradio

Runtime error

App Files Files Community

wddw12332e commited on Apr 18, 2024

Commit

4ff6dfe

verified ·

1 Parent(s): 7267a3a

Create app.py

Browse files

Files changed (1) hide show

app.py +142 -0

app.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import os
+def correct_spell(inputs):
+    return "res"
+def process_text_in_chunks(text, process_function, max_chunk_size=256):
+    # Split text into sentences
+    sentences = re.split(r'(?<=[.!?])\s+', text)
+    processed_text = ""
+    for sentence in sentences:
+        # Further split long sentences into smaller chunks
+        chunks = [sentence[i:i + max_chunk_size] for i in range(0, len(sentence), max_chunk_size)]
+        for chunk in chunks:
+            processed_text += process_function(chunk)
+        processed_text += " "  # Add space after each processed sentence
+    return processed_text.strip()
+def greet(img, apply_grammar_correction, apply_spell_check,lang_of_input):
+    if (lang_of_input=="Hindi"):
+        res = pt.image_to_string(img,lang='hin')
+        _output_name = "RESULT_OCR.txt"
+        open(_output_name, 'w').write(res)
+        return res, _output_name
+    if (lang_of_input=="Punjabi"):
+        res = pt.image_to_string(img,lang='pan')
+        _output_name = "RESULT_OCR.txt"
+        open(_output_name, 'w').write(res)
+        return res, _output_name
+    img.save("out.jpg")
+    doc = DocumentFile.from_images("out.jpg")
+    output = OCRpredictor(doc)
+    res = ""
+    for obj in output.pages:
+        for obj1 in obj.blocks:
+            for obj2 in obj1.lines:
+                for obj3 in obj2.words:
+                    res += " " + obj3.value
+            res += "\n"
+        res += "\n"
+    # Process in chunks for grammar correction
+    if apply_grammar_correction:
+        res = process_text_in_chunks(res, lambda x: happy_tt.generate_text("grammar: " + x, args=grammar_args).text)
+    # Process in chunks for spell check
+    if apply_spell_check:
+        res = process_text_in_chunks(res, correct_spell)
+    _output_name = "RESULT_OCR.txt"
+    open(_output_name, 'w').write(res)
+    return res, _output_name
+# Gradio Interface for OCR
+demo_ocr = gr.Interface(
+    fn=greet,
+    inputs=[
+        gr.Image(type="pil"),
+        gr.Checkbox(label="Apply Grammar Correction"),
+        gr.Checkbox(label="Apply Spell Check"),
+        gr.Dropdown(["English","Hindi","Punjabi"],label="Select Language")
+    ],
+    outputs=["text", "file"],
+    title="DocTR OCR with Grammar and Spell Check",
+    description="Upload an image to get the OCR results. Optionally, apply grammar and spell check."
+)
+# demo_ocr.launch(debug=True)
+def split_text_into_batches(text, max_tokens_per_batch):
+    sentences = text # Tokenize text into sentences
+    batches = []
+    current_batch = ""
+    for sentence in sentences:
+        if len(current_batch) + len(sentence) + 1 <= max_tokens_per_batch:  # Add 1 for space
+            current_batch += sentence + " "  # Add sentence to current batch
+        else:
+            batches.append(current_batch.strip())  # Add current batch to batches list
+            current_batch = sentence + " "  # Start a new batch with the current sentence
+    if current_batch:
+        batches.append(current_batch.strip())  # Add the last batch
+    return batches
+def run_t2tt(file_uploader , input_text: str, source_language: str, target_language: str) -> (str, bytes):
+    if file_uploader is not None:
+        with open(file_uploader, 'r') as file:
+            input_text=file.read()
+    source_language_code = []
+    target_language_code = []
+    max_tokens_per_batch= 256
+    batches = split_text_into_batches(input_text, max_tokens_per_batch)
+    translated_text = ""
+    return "hello"
+with gr.Blocks() as demo_t2tt:
+    with gr.Row():
+        with gr.Column():
+            with gr.Group():
+                file_uploader = gr.File(label="Upload a text file (Optional)")
+                input_text = gr.Textbox(label="Input text")
+                with gr.Row():
+                    source_language = gr.Dropdown(
+                        label="Source language",
+                        choices=[],
+                        value="Punjabi",
+                    )
+                    target_language = gr.Dropdown(
+                        label="Target language",
+                        choices=[],
+                        value=[],
+                    )
+            btn = gr.Button("Translate")
+        with gr.Column():
+            output_text = gr.Textbox(label="Translated text")
+            output_file = gr.File(label="Translated text file")
+    gr.on(
+        triggers=[input_text.submit, btn.click],
+        fn=run_t2tt,
+        inputs=[file_uploader, input_text, source_language, target_language],
+        outputs=[output_text, output_file],
+        api_name="t2tt",
+    )
+with gr.Blocks() as demo:
+    with gr.Tabs():
+        with gr.Tab(label="OCR"):
+            demo_ocr.render()
+        with gr.Tab(label="Translate"):
+            demo_t2tt.render()
+if __name__ == "__main__":
+    demo.launch()