kirp commited on
Commit
08dd527
·
verified ·
1 Parent(s): 2e9803a

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +70 -3
README.md CHANGED
@@ -1,3 +1,70 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ ---
4
+ ```bash
5
+ pip install git+https://github.com/tic-top/transformers.git
6
+ ```
7
+
8
+ ```python
9
+ from transformers import AutoModelForVision2Seq, AutoProcessor
10
+ from PIL import Image
11
+ import torch
12
+ device = "cuda:0"
13
+ repo = "kirp/kosmos2_5"
14
+ dtype = torch.bfloat16
15
+ model = AutoModelForVision2Seq.from_pretrained(repo, device_map = device).to(dtype)
16
+ processor = AutoProcessor.from_pretrained(repo)
17
+
18
+ path = "/home/yilinjia/MambaOCR/kosmos2_5/receipt_00008.png"
19
+ image = Image.open(path)
20
+ prompt = "<ocr>"
21
+ inputs = processor(text=prompt, images=image, return_tensors="pt", max_patches=4096)
22
+
23
+ raw_width, raw_height = image.size
24
+ height, width = inputs.pop("height"), inputs.pop("width")
25
+ scale_height = raw_height / height
26
+ scale_width = raw_width / width
27
+
28
+ inputs = {k: v.to(device) if v is not None else None for k, v in inputs.items()}
29
+ inputs["flattened_patches"] = inputs["flattened_patches"].to(dtype)
30
+ with torch.no_grad():
31
+ generated_text = model.generate(**inputs, max_new_tokens=256)
32
+
33
+ import re, os
34
+ def postprocess(y, scale_height, scale_width, result_path=None):
35
+ y = (
36
+ y.replace("<s>", "")
37
+ .replace("</s>", "")
38
+ .replace("<image>", "")
39
+ .replace("</image>", "")
40
+ .replace(prompt, "")
41
+ )
42
+ pattern = r"<bbox><x_\d+><y_\d+><x_\d+><y_\d+></bbox>"
43
+ bboxs_raw = re.findall(pattern, y)
44
+ lines = re.split(pattern, y)[1:]
45
+ bboxs = [re.findall(r"\d+", i) for i in bboxs_raw]
46
+ bboxs = [[int(j) for j in i] for i in bboxs]
47
+ info = ""
48
+ for i in range(len(lines)):
49
+ box = bboxs[i]
50
+ # do we need to convert the size of the box?
51
+ # maybe yes
52
+ x0, y0, x1, y1 = box
53
+ # maybe modify the order
54
+ if not (x0 >= x1 or y0 >= y1):
55
+ x0 = int(x0 * scale_width)
56
+ y0 = int(y0 * scale_height)
57
+ x1 = int(x1 * scale_width)
58
+ y1 = int(y1 * scale_height)
59
+ info += f"{x0},{y0},{x1},{y0},{x1},{y1},{x0},{y1},{lines[i]}"
60
+
61
+ if result_path is not None:
62
+ os.makedirs(os.path.dirname(result_path), exist_ok=True)
63
+ # create and write in utf-8
64
+ with open(result_path, "w", encoding="utf-8") as f:
65
+ f.write(info)
66
+ else:
67
+ print(info)
68
+
69
+ postprocess(processor.batch_decode(generated_text)[0],scale_height, scale_width)
70
+ ```