Spaces:

vithacocf
/

ocr

Running on Zero

App Files Files Community

vithacocf commited on Jul 10

Commit

25db7d4

verified ·

1 Parent(s): 0fb18ff

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -45

app.py CHANGED Viewed

@@ -49,70 +49,60 @@
 # Code fix
 import gradio as gr
-# from transformers import AutoProcessor, AutoModelForVision2Seq
 from PIL import Image, UnidentifiedImageError
-# import torch
 import os
-# # Cấu hình thiết bị
-# device = "cuda" if torch.cuda.is_available() else "cpu"
-# torch.cuda.empty_cache()
-# # Load mô hình
-# model_id = "prithivMLmods/Camel-Doc-OCR-062825"
-# processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
-# model = AutoModelForVision2Seq.from_pretrained(
-#     model_id,
-#     torch_dtype=torch.float16 if device == "cuda" else torch.float32,
-#     trust_remote_code=True
-# ).to(device)
-# Hỗ trợ định dạng ảnh
-def is_supported_image(image):
-    return isinstance(image, Image.Image)
-# Chuyển PNG sang JPG
 def convert_png_to_jpg(image):
-    converted = Image.new("RGB", image.size, (255, 255, 255))
-    converted.paste(image)
-    return converted
 # Hàm chính
-def predict(image_path, prompt=None):
-    if not isinstance(image_path, str) or not os.path.exists(image_path):
-        return "=Không tìm thấy ảnh. Vui lòng thử lại sau khi upload thành công."
     if prompt is None or prompt.strip() == "":
         return "=Vui lòng nhập prompt để trích xuất dữ liệu."
     try:
-        image = Image.open(image_path).convert("RGB")
-        if image.mode in ["RGBA", "LA"]:
-            new_img = Image.new("RGB", image.size, (255, 255, 255))
-            new_img.paste(image)
-            image = new_img
     except UnidentifiedImageError:
         return "=Không thể đọc ảnh. Ảnh có thể bị hỏng hoặc sai định dạng."
     except Exception as e:
         return f"=Lỗi khi xử lý ảnh: {str(e)}"
-    # inputs = processor(images=image, text=prompt, return_tensors="pt").to(device)
-    # generated_ids = model.generate(
-    #     **inputs,
-    #     max_new_tokens=512,
-    #     do_sample=False,
-    #     use_cache=False,
-    #     eos_token_id=processor.tokenizer.eos_token_id,
-    #     pad_token_id=processor.tokenizer.pad_token_id
-    # )
-    # result = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-    result = "aaa"
-    return result
 demo = gr.Interface(
     fn=predict,
     inputs=[

 # Code fix
 import gradio as gr
+from transformers import AutoProcessor, AutoModelForVision2Seq
 from PIL import Image, UnidentifiedImageError
+import torch
 import os
+# Cấu hình thiết bị
+device = "cuda" if torch.cuda.is_available() else "cpu"
+torch.cuda.empty_cache()
+# Load mô hình
+model_id = "prithivMLmods/Camel-Doc-OCR-062825"
+processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
+model = AutoModelForVision2Seq.from_pretrained(
+    model_id,
+    torch_dtype=torch.float16 if device == "cuda" else torch.float32,
+    trust_remote_code=True
+).to(device)
+# Hàm xử lý ảnh (nếu có kênh alpha)
 def convert_png_to_jpg(image):
+    if image.mode in ["RGBA", "LA"]:
+        converted = Image.new("RGB", image.size, (255, 255, 255))
+        converted.paste(image, mask=image.split()[-1])  # Dùng alpha làm mask
+        return converted
+    return image.convert("RGB")
 # Hàm chính
+def predict(image, prompt=None):
+    if image is None:
+        return "=Vui lòng tải lên ảnh hợp lệ."
     if prompt is None or prompt.strip() == "":
         return "=Vui lòng nhập prompt để trích xuất dữ liệu."
     try:
+        image = convert_png_to_jpg(image)
+        inputs = processor(images=image, text=prompt, return_tensors="pt").to(device)
+        generated_ids = model.generate(
+            **inputs,
+            max_new_tokens=512,
+            do_sample=False,
+            use_cache=False,
+            eos_token_id=processor.tokenizer.eos_token_id,
+            pad_token_id=processor.tokenizer.pad_token_id
+        )
+        result = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+        return result
     except UnidentifiedImageError:
         return "=Không thể đọc ảnh. Ảnh có thể bị hỏng hoặc sai định dạng."
     except Exception as e:
         return f"=Lỗi khi xử lý ảnh: {str(e)}"
 demo = gr.Interface(
     fn=predict,
     inputs=[