JKrishnanandhaa's picture
Upload 54 files
ff0e79e verified
# Hybrid Document Forgery Detection - Configuration
# System Settings
system:
device: cuda # cuda or cpu
num_workers: 0 # Reduced to avoid multiprocessing errors
pin_memory: true
seed: 42
# Data Settings
data:
image_size: 384
batch_size: 8 # Reduced for 16GB RAM
num_classes: 3 # copy_move, splicing, text_substitution
# Dataset paths
datasets:
doctamper:
path: datasets/DocTamper
type: lmdb
has_pixel_mask: true
min_region_area: 0.001 # 0.1%
rtm:
path: datasets/RealTextManipulation
type: folder
has_pixel_mask: true
min_region_area: 0.0003 # 0.03%
casia:
path: datasets/CASIA 1.0 dataset
type: folder
has_pixel_mask: false
min_region_area: 0.001 # 0.1%
skip_deskew: true
skip_denoising: true
receipts:
path: datasets/findit2
type: folder
has_pixel_mask: true
min_region_area: 0.0005 # 0.05%
fcd:
path: datasets/DocTamper/DocTamperV1-FCD
type: lmdb
has_pixel_mask: true
min_region_area: 0.00035 # 0.035% (larger forgeries, keep 99%)
scd:
path: datasets/DocTamper/DocTamperV1-SCD
type: lmdb
has_pixel_mask: true
min_region_area: 0.00009 # 0.009% (small forgeries, keep 91.5%)
# Chunked training for DocTamper (RAM constraint)
chunked_training:
enabled: true
dataset: doctamper
chunks:
- {start: 0.0, end: 0.25, name: "chunk_1"}
- {start: 0.25, end: 0.5, name: "chunk_2"}
- {start: 0.5, end: 0.75, name: "chunk_3"}
- {start: 0.75, end: 1.0, name: "chunk_4"}
# Mixed dataset training (TrainingSet + FCD + SCD)
mixing_ratios:
doctamper: 0.70 # 70% TrainingSet (maintains baseline)
scd: 0.20 # 20% SCD (handles small forgeries, 0.88% avg)
fcd: 0.10 # 10% FCD (adds diversity, 3.55% avg)
# Preprocessing
preprocessing:
deskew: true
normalize: true
noise_threshold: 15.0 # Laplacian variance threshold
median_filter_size: 3
gaussian_sigma: 0.8
# Dataset-aware preprocessing
dataset_specific:
casia:
deskew: false
denoising: false
# Augmentation (Training only)
augmentation:
enabled: true
# Common augmentations
common:
- {type: "noise", prob: 0.3}
- {type: "motion_blur", prob: 0.2}
- {type: "jpeg_compression", prob: 0.3, quality: [60, 95]}
- {type: "lighting", prob: 0.3}
- {type: "perspective", prob: 0.2}
# Dataset-specific augmentations
receipts:
- {type: "stain", prob: 0.2}
- {type: "fold", prob: 0.15}
# Model Architecture
model:
# Encoder
encoder:
name: mobilenetv3_small_100
pretrained: true
features_only: true
# Decoder
decoder:
name: unet_lite
channels: [16, 24, 40, 48, 96] # MobileNetV3-Small feature channels
upsampling: bilinear
use_depthwise_separable: true
# Output
output_channels: 1 # Binary forgery mask
# Loss Function
loss:
# Dataset-aware loss
use_dice: true # Only for datasets with pixel masks
bce_weight: 1.0
dice_weight: 1.0
# Training
training:
epochs: 30 # Per chunk (increased for single-pass training)
learning_rate: 0.001 # Higher initial LR for faster convergence
weight_decay: 0.0001 # Slight increase for better regularization
# Optimizer
optimizer: adamw
# Scheduler
scheduler:
type: cosine_annealing_warm_restarts
T_0: 10 # Restart every 10 epochs
T_mult: 2 # Double restart period each time
warmup_epochs: 3 # Warmup for first 3 epochs
min_lr: 0.00001 # End at 1/100th of initial LR
# Early stopping
early_stopping:
enabled: true
patience: 10 # Increased to allow more exploration
min_delta: 0.0005 # Accept smaller improvements (0.05%)
restore_best_weights: true # Restore best model when stopping
monitor: val_dice
mode: max
# Checkpointing
checkpoint:
save_best: true
save_every: 5 # Save every 5 epochs
save_last: true # Also save last checkpoint
monitor: val_dice
# Mask Refinement
mask_refinement:
threshold: 0.5
morphology:
closing_kernel: 5
opening_kernel: 3
# Adaptive thresholds per dataset
min_region_area:
rtm: 0.0003
receipts: 0.0005
default: 0.001
# Feature Extraction
features:
# Deep features
deep:
enabled: true
pooling: gap # Global Average Pooling
# Statistical & Shape features
statistical:
enabled: true
features:
- area
- perimeter
- aspect_ratio
- solidity
- eccentricity
- entropy
# Frequency-domain features
frequency:
enabled: true
features:
- dct_coefficients
- high_frequency_energy
- wavelet_energy
# Noise & ELA features
noise:
enabled: true
features:
- ela_mean
- ela_variance
- noise_residual
# OCR-consistency features (text documents only)
ocr:
enabled: true
gated: true # Only for text documents
features:
- confidence_deviation
- spacing_irregularity
- stroke_width_variation
# Feature normalization
normalization:
method: standard_scaler
handle_missing: true
# LightGBM Classifier
classifier:
model: lightgbm
params:
objective: multiclass
num_class: 3
boosting_type: gbdt
num_leaves: 31
learning_rate: 0.05
n_estimators: 200
max_depth: 7
min_child_samples: 20
subsample: 0.8
colsample_bytree: 0.8
reg_alpha: 0.1
reg_lambda: 0.1
random_state: 42
# Confidence threshold
confidence_threshold: 0.6
# Metrics
metrics:
# Localization metrics (only for datasets with pixel masks)
localization:
- iou
- dice
- precision
- recall
# Classification metrics
classification:
- accuracy
- f1_score
- precision
- recall
- confusion_matrix
# Dataset-aware metric computation
compute_localization:
doctamper: true
rtm: true
casia: false
receipts: true
# Outputs
outputs:
base_dir: outputs
# Subdirectories
checkpoints: outputs/checkpoints
logs: outputs/logs
plots: outputs/plots
results: outputs/results
# Visualization
visualization:
save_mask: true
save_overlay: true
save_json: true
overlay_alpha: 0.5
colormap: jet
# Deployment
deployment:
export_onnx: true
onnx_path: outputs/model.onnx
quantization: false
opset_version: 14
# Logging
logging:
level: INFO
tensorboard: true
csv: true
console: true