Spaces:
Runtime error
Runtime error
| import os | |
| import gradio as gr | |
| def correct_spell(inputs): | |
| return "res" | |
| def process_text_in_chunks(text, process_function, max_chunk_size=256): | |
| # Split text into sentences | |
| sentences = re.split(r'(?<=[.!?])\s+', text) | |
| processed_text = "" | |
| for sentence in sentences: | |
| # Further split long sentences into smaller chunks | |
| chunks = [sentence[i:i + max_chunk_size] for i in range(0, len(sentence), max_chunk_size)] | |
| for chunk in chunks: | |
| processed_text += process_function(chunk) | |
| processed_text += " " # Add space after each processed sentence | |
| return processed_text.strip() | |
| def greet(img, apply_grammar_correction, apply_spell_check,lang_of_input): | |
| if (lang_of_input=="Hindi"): | |
| res = pt.image_to_string(img,lang='hin') | |
| _output_name = "RESULT_OCR.txt" | |
| open(_output_name, 'w').write(res) | |
| return res, _output_name | |
| if (lang_of_input=="Punjabi"): | |
| res = pt.image_to_string(img,lang='pan') | |
| _output_name = "RESULT_OCR.txt" | |
| open(_output_name, 'w').write(res) | |
| return res, _output_name | |
| img.save("out.jpg") | |
| doc = DocumentFile.from_images("out.jpg") | |
| output = OCRpredictor(doc) | |
| res = "" | |
| for obj in output.pages: | |
| for obj1 in obj.blocks: | |
| for obj2 in obj1.lines: | |
| for obj3 in obj2.words: | |
| res += " " + obj3.value | |
| res += "\n" | |
| res += "\n" | |
| # Process in chunks for grammar correction | |
| if apply_grammar_correction: | |
| res = process_text_in_chunks(res, lambda x: happy_tt.generate_text("grammar: " + x, args=grammar_args).text) | |
| # Process in chunks for spell check | |
| if apply_spell_check: | |
| res = process_text_in_chunks(res, correct_spell) | |
| _output_name = "RESULT_OCR.txt" | |
| open(_output_name, 'w').write(res) | |
| return res, _output_name | |
| # Gradio Interface for OCR | |
| demo_ocr = gr.Interface( | |
| fn=greet, | |
| inputs=[ | |
| gr.Image(type="pil"), | |
| gr.Checkbox(label="Apply Grammar Correction"), | |
| gr.Checkbox(label="Apply Spell Check"), | |
| gr.Dropdown(["English","Hindi","Punjabi"],label="Select Language") | |
| ], | |
| outputs=["text", "file"], | |
| title="DocTR OCR with Grammar and Spell Check", | |
| description="Upload an image to get the OCR results. Optionally, apply grammar and spell check." | |
| ) | |
| # demo_ocr.launch(debug=True) | |
| def split_text_into_batches(text, max_tokens_per_batch): | |
| sentences = text # Tokenize text into sentences | |
| batches = [] | |
| current_batch = "" | |
| for sentence in sentences: | |
| if len(current_batch) + len(sentence) + 1 <= max_tokens_per_batch: # Add 1 for space | |
| current_batch += sentence + " " # Add sentence to current batch | |
| else: | |
| batches.append(current_batch.strip()) # Add current batch to batches list | |
| current_batch = sentence + " " # Start a new batch with the current sentence | |
| if current_batch: | |
| batches.append(current_batch.strip()) # Add the last batch | |
| return batches | |
| def run_t2tt(file_uploader , input_text: str, source_language: str, target_language: str) -> (str, bytes): | |
| if file_uploader is not None: | |
| with open(file_uploader, 'r') as file: | |
| input_text=file.read() | |
| source_language_code = [] | |
| target_language_code = [] | |
| max_tokens_per_batch= 256 | |
| batches = split_text_into_batches(input_text, max_tokens_per_batch) | |
| translated_text = "" | |
| return "hello" | |
| with gr.Blocks() as demo_t2tt: | |
| with gr.Row(): | |
| with gr.Column(): | |
| with gr.Group(): | |
| file_uploader = gr.File(label="Upload a text file (Optional)") | |
| input_text = gr.Textbox(label="Input text") | |
| with gr.Row(): | |
| source_language = gr.Dropdown( | |
| label="Source language", | |
| choices=[], | |
| value="Punjabi", | |
| ) | |
| target_language = gr.Dropdown( | |
| label="Target language", | |
| choices=[], | |
| value=[], | |
| ) | |
| btn = gr.Button("Translate") | |
| with gr.Column(): | |
| output_text = gr.Textbox(label="Translated text") | |
| output_file = gr.File(label="Translated text file") | |
| gr.on( | |
| triggers=[input_text.submit, btn.click], | |
| fn=run_t2tt, | |
| inputs=[file_uploader, input_text, source_language, target_language], | |
| outputs=[output_text, output_file], | |
| api_name="t2tt", | |
| ) | |
| with gr.Blocks() as demo: | |
| with gr.Tabs(): | |
| with gr.Tab(label="OCR"): | |
| demo_ocr.render() | |
| with gr.Tab(label="Translate"): | |
| demo_t2tt.render() | |
| if __name__ == "__main__": | |
| demo.launch() |