omkar56 commited on
Commit
e4dd4df
1 Parent(s): 8c811eb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -60
app.py CHANGED
@@ -11,66 +11,6 @@ import pyclip
11
  import pytesseract
12
  from nltk.tokenize import sent_tokenize
13
  from transformers import MarianMTModel, MarianTokenizer
14
- # Newly added below
15
- from fastapi import FastAPI, File, UploadFile, Body, Depends, HTTPException
16
- from fastapi.security.api_key import APIKeyHeader
17
- from typing import Optional
18
- from fastapi.encoders import jsonable_encoder
19
-
20
- API_KEY = os.environ.get("API_KEY")
21
-
22
- app = FastAPI()
23
- api_key_header = APIKeyHeader(name="api_key", auto_error=False)
24
-
25
- def get_api_key(api_key: Optional[str] = Depends(api_key_header)):
26
- if api_key is None or api_key != API_KEY:
27
- raise HTTPException(status_code=401, detail="Unauthorized access")
28
- return api_key
29
-
30
- @app.post("/ocr", response_model=dict)
31
- async def ocr(
32
- api_key: str = Depends(get_api_key),
33
- image: UploadFile = File(...),
34
- languages: list = Body(["eng"])
35
- ):
36
- # if api_key != API_KEY:
37
- # return {"error": "Invalid API key"}, 401
38
-
39
- try:
40
- text = image_to_string(await image.read(), lang="+".join(languages))
41
- except Exception as e:
42
- return {"error": str(e)}, 500
43
-
44
- return jsonable_encoder({"text": text})
45
-
46
-
47
- @app.post("/translate", response_model=dict)
48
- async def translate(
49
- api_key: str = Depends(get_api_key),
50
- text: str = Body(...),
51
- src: str = "en",
52
- trg: str = "zh",
53
- ):
54
- # if api_key != API_KEY:
55
- # return {"error": "Invalid API key"}, 401
56
-
57
- tokenizer, model = get_model(src, trg)
58
-
59
- translated_text = ""
60
- for sentence in sent_tokenize(text):
61
- translated_sub = model.generate(**tokenizer(sentence, return_tensors="pt"))[0]
62
- translated_text += tokenizer.decode(translated_sub, skip_special_tokens=True) + "\n"
63
-
64
- return jsonable_encoder({"translated_text": translated_text})
65
-
66
-
67
- def get_model(src: str, trg: str):
68
- model_name = f"Helsinki-NLP/opus-mt-{src}-{trg}"
69
- tokenizer = MarianTokenizer.from_pretrained(model_name)
70
- model = MarianMTModel.from_pretrained(model_name)
71
- return tokenizer, model
72
-
73
- # ===============================================
74
 
75
  nltk.download('punkt')
76
 
@@ -113,6 +53,7 @@ def ocr_lang(lang_list):
113
  # ocr tesseract
114
  def ocr_tesseract(img, languages):
115
  print("[img]", img)
 
116
  ocr_str = pytesseract.image_to_string(img, lang=ocr_lang(languages))
117
  return ocr_str
118
 
 
11
  import pytesseract
12
  from nltk.tokenize import sent_tokenize
13
  from transformers import MarianMTModel, MarianTokenizer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  nltk.download('punkt')
16
 
 
53
  # ocr tesseract
54
  def ocr_tesseract(img, languages):
55
  print("[img]", img)
56
+ print("[languages]", languages)
57
  ocr_str = pytesseract.image_to_string(img, lang=ocr_lang(languages))
58
  return ocr_str
59