ragavsachdeva commited on
Commit
879a744
1 Parent(s): b36f7c2

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +103 -0
README.md ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ ---
5
+ <style>
6
+ .title-container {
7
+ display: flex;
8
+ flex-direction: column; /* Stack elements vertically */
9
+ justify-content: center;
10
+ align-items: center;
11
+ }
12
+
13
+ .title {
14
+ font-size: 2.5em;
15
+ text-align: center;
16
+ color: #333;
17
+ font-family: 'Comic Sans MS', cursive; /* Use Comic Sans MS font */
18
+ text-transform: uppercase;
19
+ letter-spacing: 0.1em;
20
+ padding: 0.5em 0 0.2em;
21
+ background: transparent;
22
+ }
23
+
24
+ .title span {
25
+ background: -webkit-linear-gradient(45deg, #6495ED, #4169E1); /* Blue gradient */
26
+ -webkit-background-clip: text;
27
+ -webkit-text-fill-color: transparent;
28
+ }
29
+
30
+ .subheading {
31
+ font-size: 1.5em; /* Adjust the size as needed */
32
+ text-align: center;
33
+ color: #555; /* Adjust the color as needed */
34
+ font-family: 'Comic Sans MS', cursive; /* Use Comic Sans MS font */
35
+ }
36
+
37
+ .authors {
38
+ font-size: 1em; /* Adjust the size as needed */
39
+ text-align: center;
40
+ color: #777; /* Adjust the color as needed */
41
+ font-family: 'Comic Sans MS', cursive; /* Use Comic Sans MS font */
42
+ padding-top: 1em;
43
+ }
44
+
45
+ .affil {
46
+ font-size: 1em; /* Adjust the size as needed */
47
+ text-align: center;
48
+ color: #777; /* Adjust the color as needed */
49
+ font-family: 'Comic Sans MS', cursive; /* Use Comic Sans MS font */
50
+ }
51
+
52
+ </style>
53
+
54
+ <div class="title-container">
55
+ <div class="title">
56
+ The Manga <span>Whisperer</span>
57
+ </div>
58
+ <div class="subheading">
59
+ Automatically Generating Transcriptions for Comics
60
+ </div>
61
+ <div class="authors">
62
+ Ragav Sachdeva and Andrew Zisserman
63
+ </div>
64
+ <div class="affil">
65
+ University of Oxford
66
+ </div>
67
+ </div>
68
+
69
+ ![image/png](https://cdn-uploads.huggingface.co/production/uploads/630852d2f0dc38fb47c347a4/B3ngZKXGZGBcZgPK6_XF0.png)
70
+
71
+ # Usage
72
+ ```python
73
+ from transformers import AutoModel
74
+ import numpy as np
75
+ from PIL import Image
76
+ import torch
77
+ import os
78
+
79
+ images = [
80
+ "path_to_image1.jpg",
81
+ "path_to_image2.png",
82
+ ]
83
+
84
+ def read_image_as_np_array(image_path):
85
+ with open(image_path, "rb") as file:
86
+ image = Image.open(file).convert("L").convert("RGB")
87
+ image = np.array(image)
88
+ return image
89
+
90
+ images = [read_image_as_np_array(image) for image in images]
91
+
92
+ model = AutoModel.from_pretrained("ragavsachdeva/magi", trust_remote_code=True).cuda()
93
+ with torch.no_grad():
94
+ results = model.predict_detections_and_associations(images)
95
+ text_bboxes_for_all_images = [x["texts"] for x in results]
96
+ ocr_results = model.predict_ocr(images, text_bboxes_for_all_images)
97
+
98
+ for i in range(len(images)):
99
+ model.visualise_single_image_prediction(images[i], results[i], filename=f"image_{i}.png")
100
+ model.generate_transcript_for_single_image(results[i], ocr_results[i], filename=f"transcript_{i}.txt")
101
+ ```
102
+
103
+