georgescutelnicu commited on
Commit
6add590
1 Parent(s): ec64f33

Upload 13 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ examples/0.png filter=lfs diff=lfs merge=lfs -text
add_text.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PIL import Image, ImageDraw, ImageFont
2
+ import numpy as np
3
+ import textwrap
4
+ import cv2
5
+
6
+
7
+ def add_text(image, text, font_path, bubble_contour):
8
+ """
9
+ Add text inside a speech bubble contour.
10
+
11
+ Args:
12
+ image (numpy.ndarray): Processed bubble image (cv2 format - BGR).
13
+ text (str): Text to be placed inside the speech bubble.
14
+ font_path (str): Font path.
15
+ bubble_contour (numpy.ndarray): Contour of the detected speech bubble.
16
+
17
+ Returns:
18
+ numpy.ndarray: Image with text placed inside the speech bubble.
19
+ """
20
+ pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
21
+ draw = ImageDraw.Draw(pil_image)
22
+
23
+ x, y, w, h = cv2.boundingRect(bubble_contour)
24
+
25
+ wrapped_text = textwrap.fill(text, width=int(w * 0.1), break_long_words=True)
26
+
27
+ line_height = 12
28
+ font_size = 10
29
+ font = ImageFont.truetype(font_path, size=font_size)
30
+
31
+ lines = wrapped_text.split('\n')
32
+ total_text_height = (len(lines)) * line_height
33
+
34
+ if total_text_height > h:
35
+ font_size *= (h / total_text_height)
36
+ line_height = 10
37
+ total_text_height = (len(lines)) * line_height
38
+
39
+ # Vertical centering
40
+ text_y = y + (h - total_text_height) // 2
41
+
42
+ for line in lines:
43
+ text_length = draw.textlength(line, font=font)
44
+
45
+ # Horizontal centering
46
+ text_x = x + (w - text_length) // 2
47
+
48
+ draw.text((text_x, text_y), line, font=font, fill=(0, 0, 0))
49
+
50
+ text_y += line_height
51
+
52
+ image[:, :, :] = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
53
+
54
+ return image
app.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from add_text import add_text
2
+ from detect_bubbles import detect_bubbles
3
+ from process_bubble import process_bubble
4
+ from translator import MangaTranslator
5
+ from ultralytics import YOLO
6
+ from manga_ocr import MangaOcr
7
+ from PIL import Image
8
+ import gradio as gr
9
+ import numpy as np
10
+ import cv2
11
+
12
+
13
+ MODEL = "model.pt"
14
+ EXAMPLE_LIST = [["examples/0.png"],
15
+ ["examples/ex0.png"]]
16
+ TITLE = "Manga Translator"
17
+ DESCRIPTION = "Translate text in manga bubbles!"
18
+
19
+
20
+ def predict(img, translation_method="google", font="fonts/animeace_i.ttf"):
21
+ results = detect_bubbles(MODEL, img)
22
+
23
+ manga_translator = MangaTranslator()
24
+ mocr = MangaOcr()
25
+
26
+ image = np.array(img)
27
+
28
+ for result in results:
29
+ x1, y1, x2, y2, score, class_id = result
30
+
31
+ detected_image = image[int(y1):int(y2), int(x1):int(x2)]
32
+
33
+ im = Image.fromarray(np.uint8((detected_image)*255))
34
+ text = mocr(im)
35
+
36
+ detected_image, cont = process_bubble(detected_image)
37
+
38
+ text_translated = manga_translator.translate(text,
39
+ method=translation_method)
40
+
41
+ image_with_text = add_text(detected_image, text_translated, font, cont)
42
+
43
+ return image
44
+
45
+ demo = gr.Interface(fn=predict,
46
+ inputs=["image",
47
+ gr.Dropdown([("Google", "google"),
48
+ ("Helsinki-NLP's opus-mt-ja-en model",
49
+ "hf")],
50
+ label="Translation Method",
51
+ value="google"),
52
+ gr.Dropdown([("animeace_i", ("fonts/animeace_i.ttf")),
53
+ ("mangati", "fonts/mangati.ttf"),
54
+ ("ariali", "fonts/ariali.ttf")],
55
+ label="Text Font",
56
+ value="fonts/animeace_i.ttf")
57
+ ],
58
+ outputs=[gr.Image()],
59
+ examples=EXAMPLE_LIST,
60
+ title=TITLE,
61
+ description=DESCRIPTION)
62
+
63
+
64
+ demo.launch(debug=False,
65
+ share=False)
detect_bubbles.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ultralytics import YOLO
2
+
3
+
4
+ def detect_bubbles(model_path, image_path):
5
+ """
6
+ Detects bubbles in an image using a YOLOv8 model.
7
+
8
+ Args:
9
+ model_path (str): The file path to the YOLO model.
10
+ image_path (str): The file path to the input image.
11
+
12
+ Returns:
13
+ list: A list containing the coordinates, score and class_id of
14
+ the detected bubbles.
15
+ """
16
+ model = YOLO(model_path)
17
+ bubbles = model(image_path)[0]
18
+
19
+ return bubbles.boxes.data.tolist()
examples/0.png ADDED

Git LFS Details

  • SHA256: cfccaf6c12b806994d153cc083dd595c4f43884a4de54504d1cfac82b4e79de2
  • Pointer size: 132 Bytes
  • Size of remote file: 1.31 MB
examples/ex0.png ADDED
fonts/animeace_i.ttf ADDED
Binary file (28.8 kB). View file
 
fonts/ariali.ttf ADDED
Binary file (717 kB). View file
 
fonts/mangati.ttf ADDED
Binary file (30.4 kB). View file
 
model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f1a64e4e4c0dd30b361eb332866dea0f52eab9acb288b9ffdcb2622cb5d1cdb
3
+ size 6234585
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ python3-opencv
process_bubble.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+
4
+
5
+ def process_bubble(image):
6
+ """
7
+ Processes the speech bubble in the given image, making its contents white.
8
+
9
+ Parameters:
10
+ - image (numpy.ndarray): Input image.
11
+
12
+ Returns:
13
+ - image (numpy.ndarray): Image with the speech bubble content set to white.
14
+ - largest_contour (numpy.ndarray): Contour of the detected speech bubble.
15
+ """
16
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
17
+ _, thresh = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY)
18
+
19
+ contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
20
+ largest_contour = max(contours, key=cv2.contourArea)
21
+
22
+ mask = np.zeros_like(gray)
23
+ cv2.drawContours(mask, [largest_contour], -1, 255, cv2.FILLED)
24
+
25
+ image[mask == 255] = (255, 255, 255)
26
+
27
+ return image, largest_contour
requirements ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ deep-translator==1.11.4
2
+ huggingface-hub==0.22.2
3
+ manga-ocr==0.1.11
4
+ numpy==1.24.2
5
+ opencv-python==4.9.0.80
6
+ pillow==10.3.0
7
+ ultralytics==8.1.43
translator.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from deep_translator import GoogleTranslator
2
+ from transformers import pipeline
3
+
4
+
5
+ class MangaTranslator:
6
+ def __init__(self):
7
+ self.target = "en"
8
+ self.source = "ja"
9
+
10
+ def translate(self, text, method="google"):
11
+ """
12
+ Translates the given text to the target language using the specified method.
13
+
14
+ Args:
15
+ text (str): The text to be translated.
16
+ method (str):'google' for Google Translator,
17
+ 'hf' for Helsinki-NLP's opus-mt-ja-en model (HF pipeline)
18
+
19
+ Returns:
20
+ str: The translated text.
21
+ """
22
+ if method == "hf":
23
+ return self._translate_with_hf(self._preprocess_text(text))
24
+ elif method == "google":
25
+ return self._translate_with_google(self._preprocess_text(text))
26
+ else:
27
+ raise ValueError("Invalid translation method.")
28
+
29
+ def _translate_with_google(self, text):
30
+ translator = GoogleTranslator(source=self.source, target=self.target)
31
+ translated_text = translator.translate(text)
32
+ return translated_text
33
+
34
+ def _translate_with_hf(self, text):
35
+ pipe = pipeline("translation", model=f"Helsinki-NLP/opus-mt-ja-en")
36
+ translated_text = pipe(text)[0]["translation_text"]
37
+ return translated_text
38
+
39
+ def _preprocess_text(self, text):
40
+ preprocessed_text = text.replace(".", ".")
41
+ return preprocessed_text