# Reference: # https://huggingface.co/spaces/Sagar23p/mistralAI_chatBoat import gradio as gr from paddleocr import PaddleOCR, draw_ocr import asyncio import requests from huggingface_hub import InferenceClient import os API_TOKEN = os.environ.get('HUGGINGFACE_API_KEY') API_URL = "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3-8B-Instruct" headers = {"Authorization": "Bearer " +API_TOKEN} def query(question): client = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct", headers=headers) messages = [ { "role": "system", "content": "You are a helpful and honest assistant. Please, respond concisely and truthfully.", }, { "role": "user", "content": question, }, ] output = client.chat_completion(messages, model="meta-llama/Meta-Llama-3-8B-Instruct", max_tokens=1000) if output.choices[0].message['content'].find('Yes')>=0: messages+=[output.choices[0].message] messages+=[{"role": "user", "content": "What is the mistake and what is the correct sentence?"}] output = client.chat_completion(messages, model="meta-llama/Meta-Llama-3-8B-Instruct", max_tokens=1000) return output.choices[0].message['content'] def image2Text(image:str, langChoice:str): ocr = PaddleOCR(use_angle_cls=True, lang=langChoice) # need to run only once to download and load model into memory img_path = image result = ocr.ocr(img_path, cls=True) text = "" for idx in range(len(result)): res = result[idx] for line in res: import re # remove pinyin if it's Chinese if langChoice=="ch": #t = re.sub('[a-z0-9.]', '', line[1][0]) t = re.sub('[a-z]', '', line[1][0]) t = re.sub('[0-9]\.', '', t) t = t.replace(" ", "") t = t.replace("()", "") t = t.replace("()", "") t = t.replace("( )", "") t = t.replace("()", "") if t!="": text +=((t) + "\n") else: print(line) t = line[1][0] t = re.sub('Term [0-9] Spelling', '', t) t = re.sub('Page [0-9]', '', t) if t!="": text += (t + "\n") text = text.replace("\n"," ").replace(".",".\n") return text def text2PrevMistake(recognized_text, langChoice:str, current_line, session_data): if len(session_data) == 0 or session_data[0] == 0 or session_data[0] == 1: session_data = [] else: session_data = [session_data[0]-2] return text2NextMistake(recognized_text, langChoice, current_line, session_data) def text2NextMistake(recognized_text, langChoice:str, current_line, session_data): lines = recognized_text.split("\n") while 1: if len(lines) == 0: return current_line, "No mistake. Empty text.", session_data elif len(session_data) == 0: session_data = [0] current_line = lines[session_data[0]] elif session_data[0] + 1 >= len(lines): session_data = [] return current_line, "No more mistake. End of text", session_data else: session_data = [session_data[0]+1] current_line = lines[session_data[0]] question = f"Only answer Yes or No. Is there grammatical or logical mistake in the sentence: {current_line}" correction_text = query(question) if correction_text.find("No") == 0: continue else: break return current_line, correction_text, session_data with gr.Blocks() as demo: gr.HTML("""

Composition Corrector

""") session_data = gr.State([]) with gr.Row(): with gr.Column(scale=1): upload_image = gr.Image(height=400,width=400, value = "compo.jpg") langChoice = gr.Radio(["en", "ch"], value="en", label="Select lanaguage: 'ch' for Chinese, 'en' for English", info="") with gr.Column(scale=3): recognized_text = gr.Textbox(show_label=False, placeholder="composition", lines=15) toText = gr.Button("Convert image to text") current_line = gr.Textbox(show_label=False, placeholder="current line", lines=1) correction_text = gr.Textbox(show_label=False, placeholder="corrections...", lines=15) with gr.Row(): with gr.Column(scale=1): toPrevMistake = gr.Button("Find prev mistake", variant="primary") with gr.Column(scale=1): toNextMistake = gr.Button("Find next mistake", variant="primary") toText.click( image2Text, [upload_image, langChoice], [recognized_text], #show_progress=True, ) toNextMistake.click(text2NextMistake , [recognized_text, langChoice, current_line, session_data], [current_line, correction_text, session_data]) toPrevMistake.click(text2PrevMistake , [recognized_text, langChoice, current_line, session_data], [current_line, correction_text, session_data]) demo.queue().launch(share=False, inbrowser=True)