Spaces:
Runtime error
Runtime error
| import os, json, sys, tempfile, requests | |
| import cv2 as cv | |
| import numpy as np | |
| from urllib.parse import urlparse | |
| from functools import reduce | |
| from lib.panel import Panel | |
| from lib.debug import Debug | |
| class NotAnImageException (Exception): | |
| pass | |
| class Kumiko: | |
| options = {} | |
| img = False | |
| def __init__(self,options={}): | |
| self.dbg = Debug('debug' in options and options['debug']) | |
| for o in ['progress','rtl']: | |
| self.options[o] = o in options and options[o] | |
| if self.options['rtl']: | |
| Panel.set_numbering('rtl') | |
| self.options['min_panel_size_ratio'] = Panel.DEFAULT_MIN_PANEL_SIZE_RATIO | |
| if 'min_panel_size_ratio' in options and options['min_panel_size_ratio']: | |
| self.options['min_panel_size_ratio'] = options['min_panel_size_ratio'] | |
| def parse_url_list(self,urls): | |
| if self.options['progress']: | |
| print(len(urls),'files to download') | |
| tempdir = tempfile.TemporaryDirectory() | |
| i = 0 | |
| nbdigits = len(str(len(urls))) | |
| for url in urls: | |
| filename = 'img'+('0' * nbdigits + str(i))[-nbdigits:] | |
| if self.options['progress']: | |
| print('\t',url, (' -> '+filename) if urls else '') | |
| i += 1 | |
| parts = urlparse(url) | |
| if not parts.netloc or not parts.path: | |
| continue | |
| r = requests.get(url) | |
| with open(os.path.join(tempdir.name,filename), 'wb') as f: | |
| f.write(r.content) | |
| return self.parse_dir(tempdir.name,urls=urls) | |
| def parse_dir(self,directory,urls=None): | |
| filenames = [] | |
| for filename in os.scandir(directory): | |
| filenames.append(filename.path) | |
| return self.parse_images(filenames,urls) | |
| def parse_images(self,filenames=[],urls=None): | |
| infos = [] | |
| if self.options['progress']: | |
| print(len(filenames),'files to cut panels for') | |
| i = -1 | |
| for filename in sorted(filenames): | |
| i += 1 | |
| if self.options['progress']: | |
| print("\t",urls[i] if urls else filename) | |
| try: | |
| infos.append(self.parse_image(filename,url=urls[i] if urls else None)) | |
| except NotAnImageException: | |
| print("Not an image, will be ignored: {}".format(filename), file=sys.stderr) | |
| pass # this file is not an image, will not be part of the results | |
| return infos | |
| def get_contours(self,gray,filename,bgcol): | |
| thresh = None | |
| contours = None | |
| # White background: values below 220 will be black, the rest white | |
| if bgcol == 'white': | |
| ret,thresh = cv.threshold(gray,220,255,cv.THRESH_BINARY_INV) | |
| contours, hierarchy = cv.findContours(thresh, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)[-2:] | |
| elif bgcol == 'black': | |
| # Black background: values above 25 will be black, the rest white | |
| ret,thresh = cv.threshold(gray,25,255,cv.THRESH_BINARY) | |
| contours, hierarchy = cv.findContours(thresh, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)[-2:] | |
| else: | |
| raise Exception('Fatal error, unknown background color: '+str(bgcol)) | |
| self.dbg.add_image(thresh,'Thresholded image, supposed {} background'.format(bgcol)) | |
| return contours | |
| def group_small_panels(self, panels, filename): | |
| i = 0 | |
| panels_to_add = [] | |
| while i < len(panels): | |
| p1 = panels[i] | |
| if not p1.is_small(): | |
| i += 1 | |
| continue | |
| # build up a group of panels that are close to one another | |
| big_panel = p1 | |
| grouped = [i] | |
| for j in range(i+1, len(panels)): | |
| p2 = panels[j] | |
| if j == i or not p2.is_small(): | |
| continue | |
| if p2.is_close(big_panel): | |
| grouped.append(j) | |
| # build up bigger panel for current group | |
| big_panel = Panel.merge(big_panel,p2) | |
| if len(grouped) <= 1: | |
| del panels[i] | |
| continue # continue from same index i, which is a new panel (previous panel at index i has just been removed) | |
| else: | |
| # add new grouped panel, if not small | |
| if not big_panel.is_small(): | |
| panels_to_add.append(big_panel) | |
| tmp_img = self.dbg.draw_panels(self.img, list(map(lambda k: panels[k], grouped)), Debug.colours['lightblue']) | |
| tmp_img = self.dbg.draw_panels(tmp_img, [big_panel], Debug.colours['green']) | |
| self.dbg.add_image(tmp_img, 'Group small panels') | |
| # remove all panels in group | |
| for k in reversed(grouped): | |
| del panels[k] | |
| i += 1 | |
| for p in panels_to_add: | |
| panels.append(p) | |
| self.dbg.add_step('Group small panels', panels) | |
| return panels | |
| def split_panels(self,panels): | |
| new_panels = [] | |
| old_panels = [] | |
| for p in panels: | |
| new = p.split() | |
| if new != None: | |
| old_panels.append(p) | |
| new_panels += new | |
| self.dbg.draw_contours(self.img, list(map(lambda n: n.polygon, new))) | |
| for p in old_panels: | |
| panels.remove(p) | |
| panels += new_panels | |
| self.dbg.add_image(self.img, 'Split contours (shown as non-red contours)') | |
| self.dbg.add_step('Panels from split contours', panels) | |
| panels = list(filter(lambda p: not p.is_small(), panels)) | |
| self.dbg.add_step('Exclude small panels', panels) | |
| def deoverlap_panels(self,panels): | |
| for i in range(len(panels)): | |
| for j in range(len(panels)): | |
| if panels[i] == panels[j]: continue | |
| opanel = panels[i].overlap_panel(panels[j]) | |
| if not opanel: | |
| continue | |
| if opanel.w < opanel.h and panels[i].r == opanel.r: | |
| panels[i].r = opanel.x | |
| panels[j].x = opanel.r | |
| continue | |
| if opanel.w > opanel.h and panels[i].b == opanel.b: | |
| panels[i].b = opanel.y | |
| panels[j].y = opanel.b | |
| continue | |
| self.dbg.add_step('Deoverlap panels', panels) | |
| # Merge every two panels where one contains the other | |
| def merge_panels(self, panels): | |
| panels_to_remove = [] | |
| for i in range(len(panels)): | |
| for j in range(i+1,len(panels)): | |
| if panels[i].contains(panels[j]): | |
| panels_to_remove.append(j) | |
| panels[i] = Panel.merge(panels[i],panels[j]) | |
| elif panels[j].contains(panels[i]): | |
| panels_to_remove.append(i) | |
| panels[j] = Panel.merge(panels[i],panels[j]) | |
| for i in reversed(sorted(list(set(panels_to_remove)))): | |
| del panels[i] | |
| self.dbg.add_step('Merge panels', panels) | |
| # Find out actual gutters between panels | |
| def actual_gutters(panels,func=min): | |
| gutters_x = [] | |
| gutters_y = [] | |
| for p in panels: | |
| left_panel = p.find_left_panel(panels) | |
| if left_panel: gutters_x.append(p.x - left_panel.r) | |
| top_panel = p.find_top_panel(panels) | |
| if top_panel: gutters_y.append(p.y - top_panel.b) | |
| if not gutters_x: gutters_x = [1] | |
| if not gutters_y: gutters_y = [1] | |
| return { | |
| 'x': func(gutters_x), | |
| 'y': func(gutters_y), | |
| 'r': -func(gutters_x), | |
| 'b': -func(gutters_y) | |
| } | |
| # Expand panels to their neighbour's edge, or page boundaries | |
| def expand_panels(self, panels): | |
| gutters = Kumiko.actual_gutters(panels) | |
| for i in range(len(panels)): | |
| for d in ['x','y','r','b']: # expand in all four directions | |
| pcoords = {'x':panels[i].x, 'y':panels[i].y, 'r':panels[i].r, 'b':panels[i].b} | |
| newcoord = -1 | |
| neighbour = panels[i].find_neighbour_panel(d,panels) | |
| if neighbour: | |
| # expand to that neighbour's edge (minus gutter) | |
| newcoord = getattr(neighbour,{'x':'r','r':'x','y':'b','b':'y'}[d]) + gutters[d] | |
| else: | |
| # expand to the furthest known edge (frame around all panels) | |
| min_panel = min(panels,key=lambda p: getattr(p,d)) if d in ['x','y'] else max(panels,key=lambda p: getattr(p,d)) | |
| newcoord = getattr(min_panel,d) | |
| if newcoord != -1: | |
| if d in ['r','b'] and newcoord > getattr(panels[i],d) or d in ['x','y'] and newcoord < getattr(panels[i],d): | |
| setattr(panels[i],d,newcoord) | |
| self.dbg.add_step('Expand panels', panels) | |
| def parse_image(self,filename,url=None): | |
| if isinstance(filename, np.ndarray): | |
| self.img = filename | |
| else: | |
| self.img = cv.imread(filename) | |
| if not isinstance(self.img,np.ndarray) or self.img.size == 0: | |
| raise NotAnImageException('File {} is not an image'.format(filename)) | |
| self.dbg.add_step('Initial state', []) | |
| self.dbg.add_image(self.img,'Input image') | |
| size = list(self.img.shape[:2]) | |
| size.reverse() # get a [width,height] list | |
| infos = { | |
| # 'filename': url if url else os.path.basename(filename), | |
| 'size': size | |
| } | |
| Panel.img_size = size | |
| Panel.small_panel_ratio = self.options['min_panel_size_ratio'] | |
| # get license for this file | |
| # if os.path.isfile(filename+'.license'): | |
| # with open(filename+'.license') as fh: | |
| # try: | |
| # infos['license'] = json.load(fh) | |
| # except json.decoder.JSONDecodeError: | |
| # print('License file {} is not a valid JSON file'.format(filename+'.license')) | |
| # sys.exit(1) | |
| self.gray = cv.cvtColor(self.img,cv.COLOR_BGR2GRAY) | |
| self.dbg.add_image(self.gray,'Shades of gray') | |
| for bgcol in ['white','black']: | |
| res = self.parse_image_with_bgcol(infos.copy(),filename,bgcol,url) | |
| if len(res['panels']) > 1: | |
| return res | |
| return res | |
| def parse_image_with_bgcol(self,infos,filename,bgcol,url=None): | |
| contours = self.get_contours(self.gray,filename,bgcol) | |
| infos['background'] = bgcol | |
| self.dbg.infos = infos.copy() | |
| # Get (square) panels out of contours | |
| self.dbg.contourSize = int(sum(infos['size']) / 2 * 0.004) | |
| panels = [] | |
| # contour를 근사화하는 과정인듯. | |
| for contour in contours: | |
| arclength = cv.arcLength(contour,True) | |
| epsilon = 0.001 * arclength | |
| approx = cv.approxPolyDP(contour,epsilon,True) | |
| self.dbg.draw_contours(self.img, [approx], Debug.colours['red']) | |
| panels.append(Panel(polygon=approx)) | |
| self.dbg.add_image(self.img, 'Initial contours') | |
| self.dbg.add_step('Panels from initial contours', panels) | |
| # Group small panels that are close together, into bigger ones | |
| panels = self.group_small_panels(panels,filename) | |
| # See if panels can be cut into several (two non-consecutive points are close) | |
| self.split_panels(panels) | |
| # Merge panels that shouldn't have been split (speech bubble diving in a panel) | |
| self.merge_panels(panels) | |
| # splitting polygons may result in panels slightly overlapping, de-overlap them | |
| self.deoverlap_panels(panels) | |
| # re-filter out small panels | |
| panels = list(filter(lambda p: not p.is_small(), panels)) | |
| self.dbg.add_step('Exclude small panels', panels) | |
| # get actual gutters before expanding panels | |
| actual_gutters = Kumiko.actual_gutters(panels) | |
| infos['gutters'] = [actual_gutters['x'],actual_gutters['y']] | |
| panels.sort() # TODO: remove when panels expansion is smarter | |
| self.expand_panels(panels) | |
| if len(panels) == 0: | |
| panels.append( Panel([0,0,infos['size'][0],infos['size'][1]]) ); | |
| # Number panels comics-wise (ltr/rtl) | |
| panels.sort() | |
| # Simplify panels back to lists (x,y,w,h) | |
| panels = list(map(lambda p: p.to_xywh(), panels)) | |
| infos['panels'] = panels | |
| return infos | |
| def draw_rect(self, image, info): | |
| panels = info['panels'] | |
| for panel in panels: | |
| cv.rectangle(image, (panel[0], panel[1]), (panel[0]+panel[2], panel[1]+panel[3]), (0, 255, 0), 3) | |
| return image | |
| def vconcat_resize_min(self, im_list, interpolation=cv.INTER_CUBIC): | |
| w_min = min(im.shape[1] for im in im_list) | |
| im_list_resize = [cv.resize(im, (w_min, int(im.shape[0] * w_min / im.shape[1])), interpolation=interpolation) | |
| for im in im_list] | |
| return cv.vconcat(im_list_resize) | |
| def vconcat_resize_max(self, im_list, interpolation=cv.INTER_CUBIC): | |
| w_resize = max(im.shape[1] for im in im_list) | |
| im_list_resize = [cv.resize(im, (w_resize, int(im.shape[0] * w_resize / im.shape[1])), interpolation=interpolation) | |
| for im in im_list] | |
| return cv.vconcat(im_list_resize) | |
| def cut_rect(self, image, info, margin_height): | |
| panels = info['panels'] | |
| ret = None | |
| for panel in panels: | |
| x = panel[0] | |
| y = panel[1] | |
| w = panel[2] | |
| h = panel[3] | |
| margin = np.zeros([margin_height, w, 3], dtype=np.uint8) | |
| margin.fill(255) # fill white | |
| if ret is None: | |
| ret = image[y: y+h, x: x+w] | |
| else: | |
| ret = self.vconcat_resize_max([ret, margin, image[y: y+h, x: x+w]]) | |
| return ret | |