Spaces:

raycosine
/

Detangutify

Running

Detangutify / features_preproc.py

raycosine

new augmentation

f158b5d 3 months ago

4.72 kB

	# -- coding: utf-8 --
	from typing import Tuple
	import numpy as np
	from skimage.filters import threshold_otsu
	from skimage.morphology import remove_small_objects, binary_dilation, square
	from skimage.measure import label, moments_hu, regionprops
	from skimage.transform import resize
	from skimage.feature import hog

	LO = 64

	def binarize_from_gray01(gray01: np.ndarray, thr: float = 0.5) -> np.ndarray:
	g = gray01.astype(np.float32)
	if g.max() > 1:
	g /= 255.0
	return (g < thr)



	def binarize_otsu(
	gray: np.ndarray,
	min_size: int = 12,
	dilate_k: int = 2,
	keep: str = "largest", # "largest" \| "multi" \| "smart"
	area_ratio: float = 0.08, # ↓ 放宽一点
	topk: int = 8, # ↑ 多留一点备选
	horiz_keep_frac: float = 0.50, # ↓ 细长横更容易保留
	vert_keep_frac: float = 0.55, # ↓ 细长竖更容易保留
	ar_keep: float = 3.2, # 新增：细长（长/宽≥ar_keep）也保
	top_edge_frac: float = 0.15 # 新增：靠顶部的细长撇也保（y0<=H*0.15）
	) -> np.ndarray:
	g = gray.astype(np.float32)
	if g.max() > 1: g /= 255.0
	t = threshold_otsu(g)
	bw = (g <= t)

	bw = remove_small_objects(bw.astype(bool), min_size=min_size).astype(bool)
	lab = label(bw)

	if lab.max() > 0:
	areas = np.bincount(lab.ravel()); areas[0] = 0
	if keep == "largest":
	bw = (lab == areas.argmax())
	else:
	props = regionprops(lab)
	H, W = bw.shape
	max_area = areas.max()
	max_w = max([p.bbox[3]-p.bbox[1] for p in props]) if props else 0
	max_h = max([p.bbox[2]-p.bbox[0] for p in props]) if props else 0

	keep_labels = []
	for p in props:
	k = p.label
	y0, x0, y1, x1 = p.bbox
	w = x1 - x0; h = y1 - y0
	aspect = max(w, h) / max(1, min(w, h)) # 细长度
	near_top = (y0 <= int(H * top_edge_frac))

	cond_area = (areas[k] >= max_area * area_ratio)
	cond_long = (max_w>0 and w >= max_whoriz_keep_frac) or (max_h>0 and h >= max_hvert_keep_frac)
	cond_slim = (aspect >= ar_keep) # 细长撇/挑
	cond_top = near_top and (w >= 0.45*max_w) # 顶边细长撇

	if cond_area or cond_long or cond_slim or cond_top:
	keep_labels.append(k)
	if len(keep_labels) >= topk:
	break

	mask = np.zeros_like(bw, dtype=bool)
	for k in keep_labels:
	mask \|= (lab == k)
	bw = mask

	if dilate_k > 0:
	bw = binary_dilation(bw, square(dilate_k))
	return bw
	def crop_and_center(bw: np.ndarray, out_size: int = LO, margin_frac: float = 0.08) -> np.ndarray:
	ys, xs = np.where(bw)
	if len(xs) == 0 or len(ys) == 0:
	return np.zeros((out_size, out_size), dtype=bool)
	x0, x1 = xs.min(), xs.max()
	y0, y1 = ys.min(), ys.max()
	crop = bw[y0:y1+1, x0:x1+1].astype(np.float32)
	h, w = crop.shape
	side = max(h, w)
	margin = int(side * margin_frac)
	pad_y_top = (side - h) // 2 + margin
	pad_y_bot = side - h - (side - h) // 2 + margin
	pad_x_lft = (side - w) // 2 + margin
	pad_x_rgt = side - w - (side - w) // 2 + margin
	sq = np.pad(crop, ((pad_y_top, pad_y_bot), (pad_x_lft, pad_x_rgt)), mode='constant')
	sq = resize(sq, (out_size, out_size), order=1, anti_aliasing=True, preserve_range=True)
	return (sq > 0.5).astype(bool)

	def proj_features(bw: np.ndarray, m: int = 32) -> np.ndarray:
	hp = bw.sum(axis=0).astype(np.float32)
	vp = bw.sum(axis=1).astype(np.float32)
	if hp.max() > 0: hp /= hp.max()
	if vp.max() > 0: vp /= vp.max()
	def pool(v):
	idx = np.linspace(0, len(v), m+1, endpoint=True).astype(int)
	return np.array([v[idx[i]:idx[i+1]].mean() for i in range(m)], dtype=np.float32)
	return np.concatenate([pool(hp), pool(vp)], dtype=np.float32)

	def feat_vec(bw: np.ndarray) -> np.ndarray:
	f = bw.astype(np.float32)
	hu = moments_hu(f).astype(np.float32)
	hu = np.sign(hu) * np.log1p(np.abs(hu))

	hogv = hog(f, orientations=9, pixels_per_cell=(8,8), cells_per_block=(2,2),
	block_norm='L2-Hys', feature_vector=True).astype(np.float32)

	proj = proj_features(bw).astype(np.float32)
	v = np.concatenate([hu, hogv, proj]).astype(np.float32)
	n = float(np.linalg.norm(v) + 1e-8)
	return v / n

	def preprocess_and_features(gray_or_uint8: np.ndarray) -> np.ndarray:
	bw = binarize_otsu(gray_or_uint8)
	bw = crop_and_center(bw, out_size=LO, margin_frac=0.08)
	return feat_vec(bw)