Detangutify / features_preproc.py
raycosine
new augmentation
f158b5d
# -*- coding: utf-8 -*-
from typing import Tuple
import numpy as np
from skimage.filters import threshold_otsu
from skimage.morphology import remove_small_objects, binary_dilation, square
from skimage.measure import label, moments_hu, regionprops
from skimage.transform import resize
from skimage.feature import hog
LO = 64
def binarize_from_gray01(gray01: np.ndarray, thr: float = 0.5) -> np.ndarray:
g = gray01.astype(np.float32)
if g.max() > 1:
g /= 255.0
return (g < thr)
def binarize_otsu(
gray: np.ndarray,
min_size: int = 12,
dilate_k: int = 2,
keep: str = "largest", # "largest" | "multi" | "smart"
area_ratio: float = 0.08, # ↓ 放宽一点
topk: int = 8, # ↑ 多留一点备选
horiz_keep_frac: float = 0.50, # ↓ 细长横更容易保留
vert_keep_frac: float = 0.55, # ↓ 细长竖更容易保留
ar_keep: float = 3.2, # 新增:细长(长/宽≥ar_keep)也保
top_edge_frac: float = 0.15 # 新增:靠顶部的细长撇也保(y0<=H*0.15)
) -> np.ndarray:
g = gray.astype(np.float32)
if g.max() > 1: g /= 255.0
t = threshold_otsu(g)
bw = (g <= t)
bw = remove_small_objects(bw.astype(bool), min_size=min_size).astype(bool)
lab = label(bw)
if lab.max() > 0:
areas = np.bincount(lab.ravel()); areas[0] = 0
if keep == "largest":
bw = (lab == areas.argmax())
else:
props = regionprops(lab)
H, W = bw.shape
max_area = areas.max()
max_w = max([p.bbox[3]-p.bbox[1] for p in props]) if props else 0
max_h = max([p.bbox[2]-p.bbox[0] for p in props]) if props else 0
keep_labels = []
for p in props:
k = p.label
y0, x0, y1, x1 = p.bbox
w = x1 - x0; h = y1 - y0
aspect = max(w, h) / max(1, min(w, h)) # 细长度
near_top = (y0 <= int(H * top_edge_frac))
cond_area = (areas[k] >= max_area * area_ratio)
cond_long = (max_w>0 and w >= max_w*horiz_keep_frac) or (max_h>0 and h >= max_h*vert_keep_frac)
cond_slim = (aspect >= ar_keep) # 细长撇/挑
cond_top = near_top and (w >= 0.45*max_w) # 顶边细长撇
if cond_area or cond_long or cond_slim or cond_top:
keep_labels.append(k)
if len(keep_labels) >= topk:
break
mask = np.zeros_like(bw, dtype=bool)
for k in keep_labels:
mask |= (lab == k)
bw = mask
if dilate_k > 0:
bw = binary_dilation(bw, square(dilate_k))
return bw
def crop_and_center(bw: np.ndarray, out_size: int = LO, margin_frac: float = 0.08) -> np.ndarray:
ys, xs = np.where(bw)
if len(xs) == 0 or len(ys) == 0:
return np.zeros((out_size, out_size), dtype=bool)
x0, x1 = xs.min(), xs.max()
y0, y1 = ys.min(), ys.max()
crop = bw[y0:y1+1, x0:x1+1].astype(np.float32)
h, w = crop.shape
side = max(h, w)
margin = int(side * margin_frac)
pad_y_top = (side - h) // 2 + margin
pad_y_bot = side - h - (side - h) // 2 + margin
pad_x_lft = (side - w) // 2 + margin
pad_x_rgt = side - w - (side - w) // 2 + margin
sq = np.pad(crop, ((pad_y_top, pad_y_bot), (pad_x_lft, pad_x_rgt)), mode='constant')
sq = resize(sq, (out_size, out_size), order=1, anti_aliasing=True, preserve_range=True)
return (sq > 0.5).astype(bool)
def proj_features(bw: np.ndarray, m: int = 32) -> np.ndarray:
hp = bw.sum(axis=0).astype(np.float32)
vp = bw.sum(axis=1).astype(np.float32)
if hp.max() > 0: hp /= hp.max()
if vp.max() > 0: vp /= vp.max()
def pool(v):
idx = np.linspace(0, len(v), m+1, endpoint=True).astype(int)
return np.array([v[idx[i]:idx[i+1]].mean() for i in range(m)], dtype=np.float32)
return np.concatenate([pool(hp), pool(vp)], dtype=np.float32)
def feat_vec(bw: np.ndarray) -> np.ndarray:
f = bw.astype(np.float32)
hu = moments_hu(f).astype(np.float32)
hu = np.sign(hu) * np.log1p(np.abs(hu))
hogv = hog(f, orientations=9, pixels_per_cell=(8,8), cells_per_block=(2,2),
block_norm='L2-Hys', feature_vector=True).astype(np.float32)
proj = proj_features(bw).astype(np.float32)
v = np.concatenate([hu, hogv, proj]).astype(np.float32)
n = float(np.linalg.norm(v) + 1e-8)
return v / n
def preprocess_and_features(gray_or_uint8: np.ndarray) -> np.ndarray:
bw = binarize_otsu(gray_or_uint8)
bw = crop_and_center(bw, out_size=LO, margin_frac=0.08)
return feat_vec(bw)