Spaces:

Angstormy
/

hindi-ocr-api

Running

App Files Files Community

Angstormy commited on 11 days ago

Commit

d8ac28c

verified ·

1 Parent(s): bf41c57

Upload api.py with huggingface_hub

Browse files

Files changed (1) hide show

api.py +17 -2

api.py CHANGED Viewed

@@ -15,6 +15,7 @@ import math
 import contextlib
 import requests
 import unicodedata
 from huggingface_hub import login
 app = FastAPI()
@@ -207,7 +208,6 @@ async def load_resources():
     print("🧠 ALLOCATING MEMORY FOR LOCAL ENGLISH MODEL")
     print("=" * 60)
     from transformers import VisionEncoderDecoderModel, TrOCRProcessor
-    import time
     eng_model_path = "trocr-large-english"
     if os.path.exists(eng_model_path):
@@ -371,6 +371,7 @@ def beam_search_decode(model, images, k=3, max_len=25):
     beams = [(torch.full((1, 1), BOS_VAL, dtype=torch.long, device=device), 0.0, [])]
     for step_idx in range(max_len):
         candidates = []
         for seq, score, history in beams:
             # Skip beams that reached EOS
@@ -408,6 +409,17 @@ def beam_search_decode(model, images, k=3, max_len=25):
         # Sort by cumulative score and prune to keep top K beams
         beams = sorted(candidates, key=lambda x: x[1], reverse=True)[:k]
         # Stop if all surviving beams have reached EOS
         if all(b[0][0, -1].item() == EOS_VAL for b in beams):
             break
@@ -533,12 +545,15 @@ async def predict_ocr(file: UploadFile = File(...), lang: str = "hindi"):
             pixel_values, debug_b64 = preprocess_english(image_bytes)
             # Local Inference
             with torch.no_grad():
                 generated_ids = model_eng.generate(pixel_values)
                 prediction = processor_eng.batch_decode(generated_ids, skip_special_tokens=True)[0]
             final_prediction = prediction
-            print(f"ROUTING TO '{final_lang}': Local Inference -> FINAL: '{final_prediction}'")
         else:
             if model is None: return {"error": "Hindi model not loaded"}

 import contextlib
 import requests
 import unicodedata
+import time
 from huggingface_hub import login
 app = FastAPI()
     print("🧠 ALLOCATING MEMORY FOR LOCAL ENGLISH MODEL")
     print("=" * 60)
     from transformers import VisionEncoderDecoderModel, TrOCRProcessor
     eng_model_path = "trocr-large-english"
     if os.path.exists(eng_model_path):
     beams = [(torch.full((1, 1), BOS_VAL, dtype=torch.long, device=device), 0.0, [])]
     for step_idx in range(max_len):
+        step_start_time = time.time()
         candidates = []
         for seq, score, history in beams:
             # Skip beams that reached EOS
         # Sort by cumulative score and prune to keep top K beams
         beams = sorted(candidates, key=lambda x: x[1], reverse=True)[:k]
+        # Calculate step duration in seconds
+        step_duration_sec = time.time() - step_start_time
+        # Update history with duration for each beam
+        new_beams = []
+        for seq, score, history in beams:
+            if history:
+                history[-1]["duration_sec"] = round(step_duration_sec, 4)
+            new_beams.append((seq, score, history))
+        beams = new_beams
         # Stop if all surviving beams have reached EOS
         if all(b[0][0, -1].item() == EOS_VAL for b in beams):
             break
             pixel_values, debug_b64 = preprocess_english(image_bytes)
             # Local Inference
+            start_eng = time.time()
             with torch.no_grad():
                 generated_ids = model_eng.generate(pixel_values)
                 prediction = processor_eng.batch_decode(generated_ids, skip_special_tokens=True)[0]
+            eng_duration_sec = time.time() - start_eng
             final_prediction = prediction
+            inference_steps = [{"word": prediction, "steps": [{"step": "Total", "top_candidates": [{"char": "Full Sequence", "confidence": 1.0}], "duration_sec": round(eng_duration_sec, 3)}]}]
+            print(f"ROUTING TO '{final_lang}': Local Inference -> FINAL: '{final_prediction}' ({eng_duration_sec:.3f}s)")
         else:
             if model is None: return {"error": "Hindi model not loaded"}