Spaces:

gyrojeff
/

YuzuMarker.FontDetection

Running

gyrojeff commited on Mar 25, 2023

Commit

68dd12a

•

1 Parent(s): 3b25c65

feat: add tanh

Files changed (3) hide show

detector/data.py CHANGED Viewed

@@ -15,9 +15,10 @@ from PIL import Image
 class FontDataset(Dataset):
-    def __init__(self, path: str, config_path: str = "configs/font.yml"):
         self.path = path
         self.fonts = load_font_with_exclusion(config_path)
         self.images = [
             os.path.join(path, f) for f in os.listdir(path) if f.endswith(".jpg")
@@ -50,6 +51,9 @@ class FontDataset(Dataset):
             out[7:10] = out[2:5]
         out[10] = label.line_spacing / label.image_width
         out[11] = label.angle / 180.0 + 0.5
         return out
@@ -87,6 +91,7 @@ class FontDataModule(LightningDataModule):
         train_shuffle: bool = True,
         val_shuffle: bool = False,
         test_shuffle: bool = False,
         **kwargs,
     ):
         super().__init__()
@@ -94,9 +99,9 @@ class FontDataModule(LightningDataModule):
         self.train_shuffle = train_shuffle
         self.val_shuffle = val_shuffle
         self.test_shuffle = test_shuffle
-        self.train_dataset = FontDataset(train_path, config_path)
-        self.val_dataset = FontDataset(val_path, config_path)
-        self.test_dataset = FontDataset(test_path, config_path)
     def get_train_num_iter(self, num_device: int) -> int:
         return math.ceil(

 class FontDataset(Dataset):
+    def __init__(self, path: str, config_path: str = "configs/font.yml", regression_use_tanh: bool=False):
         self.path = path
         self.fonts = load_font_with_exclusion(config_path)
+        self.regression_use_tanh = regression_use_tanh
         self.images = [
             os.path.join(path, f) for f in os.listdir(path) if f.endswith(".jpg")
             out[7:10] = out[2:5]
         out[10] = label.line_spacing / label.image_width
         out[11] = label.angle / 180.0 + 0.5
+        if self.regression_use_tanh:
+            out[2:12] = out[2:12] * 2 - 1
         return out
         train_shuffle: bool = True,
         val_shuffle: bool = False,
         test_shuffle: bool = False,
+        regression_use_tanh: bool = False,
         **kwargs,
     ):
         super().__init__()
         self.train_shuffle = train_shuffle
         self.val_shuffle = val_shuffle
         self.test_shuffle = test_shuffle
+        self.train_dataset = FontDataset(train_path, config_path, regression_use_tanh)
+        self.val_dataset = FontDataset(val_path, config_path, regression_use_tanh)
+        self.test_dataset = FontDataset(test_path, config_path, regression_use_tanh)
     def get_train_num_iter(self, num_device: int) -> int:
         return math.ceil(

detector/model.py CHANGED Viewed

@@ -11,15 +11,19 @@ import pytorch_lightning as ptl
 class ResNet18Regressor(nn.Module):
-    def __init__(self):
         super().__init__()
         self.model = torchvision.models.resnet18(weights=False)
         self.model.fc = nn.Linear(512, config.FONT_COUNT + 12)
     def forward(self, X):
         X = self.model(X)
         # [0, 1]
-        X[..., config.FONT_COUNT + 2 :] = X[..., config.FONT_COUNT + 2 :].sigmoid()
         return X

 class ResNet18Regressor(nn.Module):
+    def __init__(self, regression_use_tanh: bool=False):
         super().__init__()
         self.model = torchvision.models.resnet18(weights=False)
         self.model.fc = nn.Linear(512, config.FONT_COUNT + 12)
+        self.regression_use_tanh = regression_use_tanh
     def forward(self, X):
         X = self.model(X)
         # [0, 1]
+        if not self.regression_use_tanh:
+            X[..., config.FONT_COUNT + 2 :] = X[..., config.FONT_COUNT + 2 :].sigmoid()
+        else:
+            X[..., config.FONT_COUNT + 2 :] = X[..., config.FONT_COUNT + 2 :].tanh()
         return X

train.py CHANGED Viewed

@@ -24,6 +24,8 @@ lambda_font = 4.0
 lambda_direction = 0.5
 lambda_regression = 1.0
 num_warmup_epochs = 1
 num_epochs = 100
@@ -38,6 +40,7 @@ data_module = FontDataModule(
     train_shuffle=True,
     val_shuffle=False,
     test_shuffle=False,
 )
 num_iters = data_module.get_train_num_iter(num_device) * num_epochs
@@ -62,7 +65,7 @@ trainer = ptl.Trainer(
     deterministic=True,
 )
-model = ResNet18Regressor()
 detector = FontDetector(
     model=model,

 lambda_direction = 0.5
 lambda_regression = 1.0
+regression_use_tanh = True
 num_warmup_epochs = 1
 num_epochs = 100
     train_shuffle=True,
     val_shuffle=False,
     test_shuffle=False,
+    regression_use_tanh=regression_use_tanh,
 )
 num_iters = data_module.get_train_num_iter(num_device) * num_epochs
     deterministic=True,
 )
+model = ResNet18Regressor(regression_use_tanh=regression_use_tanh)
 detector = FontDetector(
     model=model,