Image-to-text-OCR

Runtime error

App Files Files Community

omkar56 commited on Dec 9, 2023

Commit

9a6d49e

•

1 Parent(s): 5d94527

Update app.py

Browse files

Files changed (1) hide show

app.py +77 -19

app.py CHANGED Viewed

@@ -1,6 +1,5 @@
 # OCR Translate v0.2
-# 创建人：曾逸夫
-# 创建时间：2022-07-19
 import os
@@ -12,32 +11,91 @@ import pyclip
 import pytesseract
 from nltk.tokenize import sent_tokenize
 from transformers import MarianMTModel, MarianTokenizer
 nltk.download('punkt')
 OCR_TR_DESCRIPTION = '''# OCR Translate v0.2
 <div id="content_align">OCR translation system based on Tesseract</div>'''
-# 图片路径
 img_dir = "./data"
-# 获取tesseract语言列表
 choices = os.popen('tesseract --list-langs').read().split('\n')[1:-1]
-# 翻译模型选择
 def model_choice(src="en", trg="zh"):
     # https://huggingface.co/Helsinki-NLP/opus-mt-zh-en
     # https://huggingface.co/Helsinki-NLP/opus-mt-en-zh
-    model_name = f"Helsinki-NLP/opus-mt-{src}-{trg}"  # 模型名称
-    tokenizer = MarianTokenizer.from_pretrained(model_name)  # 分词器
-    model = MarianMTModel.from_pretrained(model_name)  # 模型
     return tokenizer, model
-# tesseract语言列表转pytesseract语言
 def ocr_lang(lang_list):
     lang_str = ""
     lang_len = len(lang_list)
@@ -57,12 +115,12 @@ def ocr_tesseract(img, languages):
     return ocr_str
-# 清除
 def clear_content():
     return None
-# 复制到剪贴板
 def cp_text(input_text):
     # sudo apt-get install xclip
     try:
@@ -72,18 +130,18 @@ def cp_text(input_text):
         print(e)
-# 清除剪贴板
 def cp_clear():
     pyclip.clear()
-# 翻译
 def translate(input_text, inputs_transStyle):
-    # 参考：https://huggingface.co/docs/transformers/model_doc/marian
     if input_text is None or input_text == "":
         return "System prompt: There is no content to translate!"
-    # 选择翻译模型
     trans_src, trans_trg = inputs_transStyle.split("-")[0], inputs_transStyle.split("-")[1]
     tokenizer, model = model_choice(trans_src, trans_trg)
@@ -110,7 +168,7 @@ def main():
     with gr.Blocks(css='style.css') as ocr_tr:
         gr.Markdown(OCR_TR_DESCRIPTION)
-        # -------------- OCR 文字提取 --------------
         with gr.Box():
             with gr.Row():
@@ -147,7 +205,7 @@ def main():
                                 ["./data/test03.png", ["chi_sim"]]]
                 gr.Examples(example_list, [inputs_img, inputs_lang], outputs_text, ocr_tesseract, cache_examples=False)
-        # -------------- 翻译 --------------
         with gr.Box():
             with gr.Row():
@@ -165,11 +223,11 @@ def main():
             outputs_text,])
         clear_img_btn.click(fn=clear_content, inputs=[], outputs=[inputs_img])
-        # ---------------------- 翻译 ----------------------
         translate_btn.click(fn=translate, inputs=[outputs_text, inputs_transStyle], outputs=[outputs_tr_text])
         clear_text_btn.click(fn=clear_content, inputs=[], outputs=[outputs_text])
-        # ---------------------- 复制到剪贴板 ----------------------
         cp_btn.click(fn=cp_text, inputs=[outputs_tr_text], outputs=[])
         cp_clear_btn.click(fn=cp_clear, inputs=[], outputs=[])

 # OCR Translate v0.2
 import os
 import pytesseract
 from nltk.tokenize import sent_tokenize
 from transformers import MarianMTModel, MarianTokenizer
+# Newly added below
+from fastapi import FastAPI, File, UploadFile, Body, Security
+from fastapi.security.api_key import APIKeyHeader
+from fastapi.encoders import jsonable_encoder
+API_KEY = os.environ.get("API_KEY")
+app = FastAPI()
+api_key_header = APIKeyHeader(name="api_key", auto_error=False)
+def get_api_key(api_key: Optional[str] = Depends(security)):
+    if api_key is None or api_key != API_KEY:
+        raise HTTPException(status_code=401, detail="Unauthorized access")
+    return api_key
+@app.post("/ocr", response_model=dict)
+async def ocr(
+    api_key: str = Depends(get_api_key),
+    image: UploadFile = File(...),
+    languages: list = Body(["eng"])
+):
+    # if api_key != API_KEY:
+    #     return {"error": "Invalid API key"}, 401
+    try:
+        text = image_to_string(await image.read(), lang="+".join(languages))
+    except Exception as e:
+        return {"error": str(e)}, 500
+    return jsonable_encoder({"text": text})
+@app.post("/translate", response_model=dict)
+async def translate(
+    api_key: str = Depends(get_api_key),
+    text: str = Body(...),
+    src: str = "en",
+    trg: str = "zh",
+):
+    # if api_key != API_KEY:
+    #     return {"error": "Invalid API key"}, 401
+    tokenizer, model = get_model(src, trg)
+    translated_text = ""
+    for sentence in sent_tokenize(text):
+        translated_sub = model.generate(**tokenizer(sentence, return_tensors="pt"))[0]
+        translated_text += tokenizer.decode(translated_sub, skip_special_tokens=True) + "\n"
+    return jsonable_encoder({"translated_text": translated_text})
+def get_model(src: str, trg: str):
+    model_name = f"Helsinki-NLP/opus-mt-{src}-{trg}"
+    tokenizer = MarianTokenizer.from_pretrained(model_name)
+    model = MarianMTModel.from_pretrained(model_name)
+    return tokenizer, model
+# ===============================================
 nltk.download('punkt')
 OCR_TR_DESCRIPTION = '''# OCR Translate v0.2
 <div id="content_align">OCR translation system based on Tesseract</div>'''
+# Image path
 img_dir = "./data"
+# Get tesseract language list
 choices = os.popen('tesseract --list-langs').read().split('\n')[1:-1]
+# Translation model selection
 def model_choice(src="en", trg="zh"):
     # https://huggingface.co/Helsinki-NLP/opus-mt-zh-en
     # https://huggingface.co/Helsinki-NLP/opus-mt-en-zh
+    model_name = f"Helsinki-NLP/opus-mt-{src}-{trg}"  # Model name
+    tokenizer = MarianTokenizer.from_pretrained(model_name)  # tokenizer
+    model = MarianMTModel.from_pretrained(model_name)  # Model
     return tokenizer, model
+# Convert tesseract language list to pytesseract language
 def ocr_lang(lang_list):
     lang_str = ""
     lang_len = len(lang_list)
     return ocr_str
+# Clear
 def clear_content():
     return None
+# copy to clipboard
 def cp_text(input_text):
     # sudo apt-get install xclip
     try:
         print(e)
+# clear clipboard
 def cp_clear():
     pyclip.clear()
+# translate
 def translate(input_text, inputs_transStyle):
+    # reference：https://huggingface.co/docs/transformers/model_doc/marian
     if input_text is None or input_text == "":
         return "System prompt: There is no content to translate!"
+    # Select translation model
     trans_src, trans_trg = inputs_transStyle.split("-")[0], inputs_transStyle.split("-")[1]
     tokenizer, model = model_choice(trans_src, trans_trg)
     with gr.Blocks(css='style.css') as ocr_tr:
         gr.Markdown(OCR_TR_DESCRIPTION)
+        # -------------- OCR text extraction --------------
         with gr.Box():
             with gr.Row():
                                 ["./data/test03.png", ["chi_sim"]]]
                 gr.Examples(example_list, [inputs_img, inputs_lang], outputs_text, ocr_tesseract, cache_examples=False)
+        # -------------- translate --------------
         with gr.Box():
             with gr.Row():
             outputs_text,])
         clear_img_btn.click(fn=clear_content, inputs=[], outputs=[inputs_img])
+        # ---------------------- translate ----------------------
         translate_btn.click(fn=translate, inputs=[outputs_text, inputs_transStyle], outputs=[outputs_tr_text])
         clear_text_btn.click(fn=clear_content, inputs=[], outputs=[outputs_text])
+        # ---------------------- copy to clipboard ----------------------
         cp_btn.click(fn=cp_text, inputs=[outputs_tr_text], outputs=[])
         cp_clear_btn.click(fn=cp_clear, inputs=[], outputs=[])