Spaces:

VanguardAI
/

Arabic-OCR

Sleeping

App Files Files Community

VanguardAI commited on 23 days ago

Commit

928fb2c

verified ·

1 Parent(s): 98b1d96

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -8

app.py CHANGED Viewed

@@ -20,6 +20,18 @@ import numpy as np
 # Import Arabic text correction module
 from arabic_corrector import get_corrector
 # Constants
 MIN_PIXELS = 3136
 MAX_PIXELS = 11289600
@@ -427,13 +439,18 @@ def inference(image: Image.Image, prompt: str, max_new_tokens: int = 24000) -> s
         primary_device = next(model.parameters()).device
         inputs = inputs.to(primary_device)
-        # Generate output
         with torch.no_grad():
             generated_ids = model.generate(
                 **inputs,
                 max_new_tokens=max_new_tokens,
-                do_sample=False,
-                temperature=0.1
             )
         # Decode output
@@ -459,7 +476,6 @@ def inference(image: Image.Image, prompt: str, max_new_tokens: int = 24000) -> s
 def _generate_text_and_confidence_for_crop(
     image: Image.Image,
     max_new_tokens: int = 128,
-    temperature: float = 0.1,
 ) -> Tuple[str, float]:
     """Generate text for a cropped region and compute average per-token confidence from model scores.
@@ -500,13 +516,17 @@ def _generate_text_and_confidence_for_crop(
         primary_device = next(model.parameters()).device
         inputs = inputs.to(primary_device)
-        # Generate with scores
         with torch.no_grad():
             outputs = model.generate(
                 **inputs,
                 max_new_tokens=max_new_tokens,
-                do_sample=False,
-                temperature=temperature,
                 output_scores=True,
                 return_dict_in_generate=True,
             )
@@ -1542,4 +1562,4 @@ if __name__ == "__main__":
         share=False,
         debug=True,
         show_error=True
-    )

 # Import Arabic text correction module
 from arabic_corrector import get_corrector
+# ========================================
+# DETERMINISTIC SETTINGS FOR CONSISTENCY
+# ========================================
+# Set seeds for reproducibility - ensures same image always gives same output
+torch.manual_seed(42)
+torch.cuda.manual_seed_all(42)
+np.random.seed(42)
+# Ensure deterministic behavior in PyTorch operations
+torch.backends.cudnn.deterministic = True
+torch.backends.cudnn.benchmark = False
 # Constants
 MIN_PIXELS = 3136
 MAX_PIXELS = 11289600
         primary_device = next(model.parameters()).device
         inputs = inputs.to(primary_device)
+        # Generate output - DETERMINISTIC MODE
+        # Set seed for complete reproducibility
+        torch.manual_seed(42)
+        if torch.cuda.is_available():
+            torch.cuda.manual_seed_all(42)
         with torch.no_grad():
             generated_ids = model.generate(
                 **inputs,
                 max_new_tokens=max_new_tokens,
+                do_sample=False,  # Greedy decoding for deterministic output
+                # Remove temperature/top_p/top_k when do_sample=False for consistency
             )
         # Decode output
 def _generate_text_and_confidence_for_crop(
     image: Image.Image,
     max_new_tokens: int = 128,
 ) -> Tuple[str, float]:
     """Generate text for a cropped region and compute average per-token confidence from model scores.
         primary_device = next(model.parameters()).device
         inputs = inputs.to(primary_device)
+        # Set seed for deterministic output
+        torch.manual_seed(42)
+        if torch.cuda.is_available():
+            torch.cuda.manual_seed_all(42)
+        # Generate with scores - DETERMINISTIC MODE
         with torch.no_grad():
             outputs = model.generate(
                 **inputs,
                 max_new_tokens=max_new_tokens,
+                do_sample=False,  # Greedy decoding for deterministic output
                 output_scores=True,
                 return_dict_in_generate=True,
             )
         share=False,
         debug=True,
         show_error=True
+    )