import os
import gradio as gr

def correct_spell(inputs):
    return "res"

def process_text_in_chunks(text, process_function, max_chunk_size=256):
    # Split text into sentences
    sentences = re.split(r'(?<=[.!?])\s+', text)
    processed_text = ""

    for sentence in sentences:
        # Further split long sentences into smaller chunks
        chunks = [sentence[i:i + max_chunk_size] for i in range(0, len(sentence), max_chunk_size)]
        for chunk in chunks:
            processed_text += process_function(chunk)
        processed_text += " "  # Add space after each processed sentence

    return processed_text.strip()

def greet(img, apply_grammar_correction, apply_spell_check,lang_of_input):

    if (lang_of_input=="Hindi"):
        res = pt.image_to_string(img,lang='hin')
        _output_name = "RESULT_OCR.txt"
        open(_output_name, 'w').write(res)
        return res, _output_name

    if (lang_of_input=="Punjabi"):
        res = pt.image_to_string(img,lang='pan')
        _output_name = "RESULT_OCR.txt"
        open(_output_name, 'w').write(res)
        return res, _output_name


    img.save("out.jpg")
    doc = DocumentFile.from_images("out.jpg")
    output = OCRpredictor(doc)

    res = ""
    for obj in output.pages:
        for obj1 in obj.blocks:
            for obj2 in obj1.lines:
                for obj3 in obj2.words:
                    res += " " + obj3.value
            res += "\n"
        res += "\n"

    # Process in chunks for grammar correction
    if apply_grammar_correction:
        res = process_text_in_chunks(res, lambda x: happy_tt.generate_text("grammar: " + x, args=grammar_args).text)

    # Process in chunks for spell check
    if apply_spell_check:
        res = process_text_in_chunks(res, correct_spell)

    _output_name = "RESULT_OCR.txt"
    open(_output_name, 'w').write(res)
    return res, _output_name

# Gradio Interface for OCR
demo_ocr = gr.Interface(
    fn=greet,
    inputs=[
        gr.Image(type="pil"),
        gr.Checkbox(label="Apply Grammar Correction"),
        gr.Checkbox(label="Apply Spell Check"),
        gr.Dropdown(["English","Hindi","Punjabi"],label="Select Language")
    ],
    outputs=["text", "file"],
    title="DocTR OCR with Grammar and Spell Check",
    description="Upload an image to get the OCR results. Optionally, apply grammar and spell check."
)


# demo_ocr.launch(debug=True)

def split_text_into_batches(text, max_tokens_per_batch):
    sentences = text # Tokenize text into sentences
    batches = []
    current_batch = ""
    for sentence in sentences:
        if len(current_batch) + len(sentence) + 1 <= max_tokens_per_batch:  # Add 1 for space
            current_batch += sentence + " "  # Add sentence to current batch
        else:
            batches.append(current_batch.strip())  # Add current batch to batches list
            current_batch = sentence + " "  # Start a new batch with the current sentence
    if current_batch:
        batches.append(current_batch.strip())  # Add the last batch
    return batches


def run_t2tt(file_uploader , input_text: str, source_language: str, target_language: str) -> (str, bytes):
    if file_uploader is not None:
        with open(file_uploader, 'r') as file:
            input_text=file.read()
    source_language_code = []
    target_language_code = []
    max_tokens_per_batch= 256
    batches = split_text_into_batches(input_text, max_tokens_per_batch)
    translated_text = ""
    return "hello"

with gr.Blocks() as demo_t2tt:
    with gr.Row():
        with gr.Column():
            with gr.Group():
                file_uploader = gr.File(label="Upload a text file (Optional)")
                input_text = gr.Textbox(label="Input text")
                with gr.Row():
                    source_language = gr.Dropdown(
                        label="Source language",
                        choices=[],
                        value="Punjabi",
                    )
                    target_language = gr.Dropdown(
                        label="Target language",
                        choices=[],
                        value=[],
                    )
            btn = gr.Button("Translate")
        with gr.Column():
            output_text = gr.Textbox(label="Translated text")
            output_file = gr.File(label="Translated text file")

    gr.on(
        triggers=[input_text.submit, btn.click],
        fn=run_t2tt,
        inputs=[file_uploader, input_text, source_language, target_language],
        outputs=[output_text, output_file],
        api_name="t2tt",
    )

with gr.Blocks() as demo:
    with gr.Tabs():
        with gr.Tab(label="OCR"):
            demo_ocr.render()
        with gr.Tab(label="Translate"):
            demo_t2tt.render()

if __name__ == "__main__":
    demo.launch()