Spaces:

wddw12332e
/

test_gradio

Runtime error

App Files Files Community

test_gradio / app.py

wddw12332e

Update app.py

740b8c2 verified 6 months ago

raw

history blame contribute delete

No virus

4.8 kB

	import os
	import gradio as gr

	def correct_spell(inputs):
	return "res"

	def process_text_in_chunks(text, process_function, max_chunk_size=256):
	# Split text into sentences
	sentences = re.split(r'(?<=[.!?])\s+', text)
	processed_text = ""

	for sentence in sentences:
	# Further split long sentences into smaller chunks
	chunks = [sentence[i:i + max_chunk_size] for i in range(0, len(sentence), max_chunk_size)]
	for chunk in chunks:
	processed_text += process_function(chunk)
	processed_text += " " # Add space after each processed sentence

	return processed_text.strip()

	def greet(img, apply_grammar_correction, apply_spell_check,lang_of_input):

	if (lang_of_input=="Hindi"):
	res = pt.image_to_string(img,lang='hin')
	_output_name = "RESULT_OCR.txt"
	open(_output_name, 'w').write(res)
	return res, _output_name

	if (lang_of_input=="Punjabi"):
	res = pt.image_to_string(img,lang='pan')
	_output_name = "RESULT_OCR.txt"
	open(_output_name, 'w').write(res)
	return res, _output_name


	img.save("out.jpg")
	doc = DocumentFile.from_images("out.jpg")
	output = OCRpredictor(doc)

	res = ""
	for obj in output.pages:
	for obj1 in obj.blocks:
	for obj2 in obj1.lines:
	for obj3 in obj2.words:
	res += " " + obj3.value
	res += "\n"
	res += "\n"

	# Process in chunks for grammar correction
	if apply_grammar_correction:
	res = process_text_in_chunks(res, lambda x: happy_tt.generate_text("grammar: " + x, args=grammar_args).text)

	# Process in chunks for spell check
	if apply_spell_check:
	res = process_text_in_chunks(res, correct_spell)

	_output_name = "RESULT_OCR.txt"
	open(_output_name, 'w').write(res)
	return res, _output_name

	# Gradio Interface for OCR
	demo_ocr = gr.Interface(
	fn=greet,
	inputs=[
	gr.Image(type="pil"),
	gr.Checkbox(label="Apply Grammar Correction"),
	gr.Checkbox(label="Apply Spell Check"),
	gr.Dropdown(["English","Hindi","Punjabi"],label="Select Language")
	],
	outputs=["text", "file"],
	title="DocTR OCR with Grammar and Spell Check",
	description="Upload an image to get the OCR results. Optionally, apply grammar and spell check."
	)


	# demo_ocr.launch(debug=True)

	def split_text_into_batches(text, max_tokens_per_batch):
	sentences = text # Tokenize text into sentences
	batches = []
	current_batch = ""
	for sentence in sentences:
	if len(current_batch) + len(sentence) + 1 <= max_tokens_per_batch: # Add 1 for space
	current_batch += sentence + " " # Add sentence to current batch
	else:
	batches.append(current_batch.strip()) # Add current batch to batches list
	current_batch = sentence + " " # Start a new batch with the current sentence
	if current_batch:
	batches.append(current_batch.strip()) # Add the last batch
	return batches


	def run_t2tt(file_uploader , input_text: str, source_language: str, target_language: str) -> (str, bytes):
	if file_uploader is not None:
	with open(file_uploader, 'r') as file:
	input_text=file.read()
	source_language_code = []
	target_language_code = []
	max_tokens_per_batch= 256
	batches = split_text_into_batches(input_text, max_tokens_per_batch)
	translated_text = ""
	return "hello"

	with gr.Blocks() as demo_t2tt:
	with gr.Row():
	with gr.Column():
	with gr.Group():
	file_uploader = gr.File(label="Upload a text file (Optional)")
	input_text = gr.Textbox(label="Input text")
	with gr.Row():
	source_language = gr.Dropdown(
	label="Source language",
	choices=[],
	value="Punjabi",
	)
	target_language = gr.Dropdown(
	label="Target language",
	choices=[],
	value=[],
	)
	btn = gr.Button("Translate")
	with gr.Column():
	output_text = gr.Textbox(label="Translated text")
	output_file = gr.File(label="Translated text file")

	gr.on(
	triggers=[input_text.submit, btn.click],
	fn=run_t2tt,
	inputs=[file_uploader, input_text, source_language, target_language],
	outputs=[output_text, output_file],
	api_name="t2tt",
	)

	with gr.Blocks() as demo:
	with gr.Tabs():
	with gr.Tab(label="OCR"):
	demo_ocr.render()
	with gr.Tab(label="Translate"):
	demo_t2tt.render()

	if __name__ == "__main__":
	demo.launch()