File size: 3,055 Bytes
b8b3256
 
 
 
 
ea5f05b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b8b3256
ea5f05b
 
 
 
 
 
b8b3256
ea5f05b
b8b3256
ea5f05b
b8b3256
 
ea5f05b
b8b3256
ea5f05b
b8b3256
ea5f05b
 
 
 
 
 
 
 
 
 
 
 
 
 
b8b3256
 
ea5f05b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
df009b3
 
ea5f05b
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import os
from doctr.io import DocumentFile
from doctr.models import ocr_predictor
import gradio as gr
from PIL import Image
from happytransformer import HappyTextToText, TTSettings
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import re

# OCR Predictor initialization
predictor = ocr_predictor(det_arch='db_mobilenet_v3_large', reco_arch='crnn_vgg16_bn', pretrained=True)

# Grammar Correction Model initialization
happy_tt = HappyTextToText("T5", "vennify/t5-base-grammar-correction")
grammar_args = TTSettings(num_beams=5, min_length=1)

# Spell Check Model initialization
tokenizer = AutoTokenizer.from_pretrained("Bhuvana/t5-base-spellchecker", use_fast=False)
model = AutoModelForSeq2SeqLM.from_pretrained("Bhuvana/t5-base-spellchecker")

def correct_spell(inputs):
    input_ids = tokenizer.encode(inputs, return_tensors='pt')
    sample_output = model.generate(
        input_ids,
        do_sample=True,
        max_length=512,
        top_p=0.99,
        num_return_sequences=1
    )
    res = tokenizer.decode(sample_output[0], skip_special_tokens=True)
    return res

def process_text_in_chunks(text, process_function, max_chunk_size=256):
    # Split text into sentences
    sentences = re.split(r'(?<=[.!?])\s+', text)
    processed_text = ""

    for sentence in sentences:
        # Further split long sentences into smaller chunks
        chunks = [sentence[i:i + max_chunk_size] for i in range(0, len(sentence), max_chunk_size)]
        for chunk in chunks:
            processed_text += process_function(chunk)
        processed_text += " "  # Add space after each processed sentence

    return processed_text.strip()

def greet(img, apply_grammar_correction, apply_spell_check):
    img.save("out.jpg")
    doc = DocumentFile.from_images("out.jpg")
    output = predictor(doc)

    res = ""
    for obj in output.pages:
        for obj1 in obj.blocks:
            for obj2 in obj1.lines:
                for obj3 in obj2.words:
                    res += " " + obj3.value
            res += "\n"
        res += "\n"

    # Process in chunks for grammar correction
    if apply_grammar_correction:
        res = process_text_in_chunks(res, lambda x: happy_tt.generate_text("grammar: " + x, args=grammar_args).text)

    # Process in chunks for spell check
    if apply_spell_check:
        res = process_text_in_chunks(res, correct_spell)

    _output_name = "RESULT_OCR.txt"
    open(_output_name, 'w').write(res)
    return res, _output_name

# Gradio Interface
title = "DocTR OCR with Grammar and Spell Check"
description = "Upload an image to get the OCR results. Optionally, apply grammar and spell check."

demo = gr.Interface(
    fn=greet,
    inputs=[
        gr.Image(type="pil"),
        gr.Checkbox(label="Apply Grammar Correction"),
        gr.Checkbox(label="Apply Spell Check")
    ],
    outputs=["text", "file"],
    title=title,
    description=description,
    examples=[["Examples/Book.png"], ["Examples/News.png"], ["Examples/Manuscript.jpg"], ["Examples/Files.jpg"]]

)

demo.launch(debug=True)