ragavsachdeva
commited on
Commit
•
879a744
1
Parent(s):
b36f7c2
Create README.md
Browse files
README.md
ADDED
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
language:
|
3 |
+
- en
|
4 |
+
---
|
5 |
+
<style>
|
6 |
+
.title-container {
|
7 |
+
display: flex;
|
8 |
+
flex-direction: column; /* Stack elements vertically */
|
9 |
+
justify-content: center;
|
10 |
+
align-items: center;
|
11 |
+
}
|
12 |
+
|
13 |
+
.title {
|
14 |
+
font-size: 2.5em;
|
15 |
+
text-align: center;
|
16 |
+
color: #333;
|
17 |
+
font-family: 'Comic Sans MS', cursive; /* Use Comic Sans MS font */
|
18 |
+
text-transform: uppercase;
|
19 |
+
letter-spacing: 0.1em;
|
20 |
+
padding: 0.5em 0 0.2em;
|
21 |
+
background: transparent;
|
22 |
+
}
|
23 |
+
|
24 |
+
.title span {
|
25 |
+
background: -webkit-linear-gradient(45deg, #6495ED, #4169E1); /* Blue gradient */
|
26 |
+
-webkit-background-clip: text;
|
27 |
+
-webkit-text-fill-color: transparent;
|
28 |
+
}
|
29 |
+
|
30 |
+
.subheading {
|
31 |
+
font-size: 1.5em; /* Adjust the size as needed */
|
32 |
+
text-align: center;
|
33 |
+
color: #555; /* Adjust the color as needed */
|
34 |
+
font-family: 'Comic Sans MS', cursive; /* Use Comic Sans MS font */
|
35 |
+
}
|
36 |
+
|
37 |
+
.authors {
|
38 |
+
font-size: 1em; /* Adjust the size as needed */
|
39 |
+
text-align: center;
|
40 |
+
color: #777; /* Adjust the color as needed */
|
41 |
+
font-family: 'Comic Sans MS', cursive; /* Use Comic Sans MS font */
|
42 |
+
padding-top: 1em;
|
43 |
+
}
|
44 |
+
|
45 |
+
.affil {
|
46 |
+
font-size: 1em; /* Adjust the size as needed */
|
47 |
+
text-align: center;
|
48 |
+
color: #777; /* Adjust the color as needed */
|
49 |
+
font-family: 'Comic Sans MS', cursive; /* Use Comic Sans MS font */
|
50 |
+
}
|
51 |
+
|
52 |
+
</style>
|
53 |
+
|
54 |
+
<div class="title-container">
|
55 |
+
<div class="title">
|
56 |
+
The Manga <span>Whisperer</span>
|
57 |
+
</div>
|
58 |
+
<div class="subheading">
|
59 |
+
Automatically Generating Transcriptions for Comics
|
60 |
+
</div>
|
61 |
+
<div class="authors">
|
62 |
+
Ragav Sachdeva and Andrew Zisserman
|
63 |
+
</div>
|
64 |
+
<div class="affil">
|
65 |
+
University of Oxford
|
66 |
+
</div>
|
67 |
+
</div>
|
68 |
+
|
69 |
+
![image/png](https://cdn-uploads.huggingface.co/production/uploads/630852d2f0dc38fb47c347a4/B3ngZKXGZGBcZgPK6_XF0.png)
|
70 |
+
|
71 |
+
# Usage
|
72 |
+
```python
|
73 |
+
from transformers import AutoModel
|
74 |
+
import numpy as np
|
75 |
+
from PIL import Image
|
76 |
+
import torch
|
77 |
+
import os
|
78 |
+
|
79 |
+
images = [
|
80 |
+
"path_to_image1.jpg",
|
81 |
+
"path_to_image2.png",
|
82 |
+
]
|
83 |
+
|
84 |
+
def read_image_as_np_array(image_path):
|
85 |
+
with open(image_path, "rb") as file:
|
86 |
+
image = Image.open(file).convert("L").convert("RGB")
|
87 |
+
image = np.array(image)
|
88 |
+
return image
|
89 |
+
|
90 |
+
images = [read_image_as_np_array(image) for image in images]
|
91 |
+
|
92 |
+
model = AutoModel.from_pretrained("ragavsachdeva/magi", trust_remote_code=True).cuda()
|
93 |
+
with torch.no_grad():
|
94 |
+
results = model.predict_detections_and_associations(images)
|
95 |
+
text_bboxes_for_all_images = [x["texts"] for x in results]
|
96 |
+
ocr_results = model.predict_ocr(images, text_bboxes_for_all_images)
|
97 |
+
|
98 |
+
for i in range(len(images)):
|
99 |
+
model.visualise_single_image_prediction(images[i], results[i], filename=f"image_{i}.png")
|
100 |
+
model.generate_transcript_for_single_image(results[i], ocr_results[i], filename=f"transcript_{i}.txt")
|
101 |
+
```
|
102 |
+
|
103 |
+
|