Spaces:

akcanca
/

df2

Sleeping

Mustafa Akcanca

Add CFA

5196d55 3 months ago

21.6 kB

	#!/usr/bin/env python3
	"""
	Popescu–Farid CFA Interpolation Forensics (End-to-End)

	Implements the algorithm from:

	A. C. Popescu and H. Farid,
	"Exposing Digital Forgeries in Color Filter Array Interpolated Images,"
	IEEE Trans. Signal Processing, 2005.

	Features:
	- EM estimation of the linear correlation model for each color channel
	(single-channel Gaussian vs uniform mixture).
	- Posterior probability map per channel.
	- Synthetic CFA maps s_r, s_g, s_b for a Bayer pattern.
	- Fourier-domain similarity M(p_c, s_c) per channel.
	- Sliding-window analysis with 50% overlap.
	- Threshold calibration on a set of negative images to get ~0% FPs.
	- Multi-channel fusion (default): window authentic if ANY channel is CFA;
	optional green-only mode: window authentic if GREEN channel is CFA.

	Usage:

	# 1) Calibrate thresholds on negative (non-CFA / tampered) images
	python pf_cfa_detector.py calibrate \
	--neg-dir path/to/negative_images \
	--output thresholds.json

	# 2) Detect on a single image (all channels, PF-style fusion)
	python pf_cfa_detector.py detect \
	--image path/to/test_image.png \
	--thresholds thresholds.json

	# 3) Detect on a directory, green-only mode
	python pf_cfa_detector.py detect \
	--image-dir path/to/test_images \
	--thresholds thresholds.json \
	--green-only
	"""

	import argparse
	import json
	import math
	import os
	from typing import Dict, List, Sequence, Tuple

	import numpy as np
	from numpy.fft import fft2
	from PIL import Image


	# -------------------------------------------------------------------------
	# Parameters and utilities
	# -------------------------------------------------------------------------

	IMG_EXTS = (".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff")


	def list_image_files(directory: str) -> List[str]:
	"""Return all image files in a directory (non-recursive) with known extensions."""
	paths = []
	for name in os.listdir(directory):
	if name.lower().endswith(IMG_EXTS):
	paths.append(os.path.join(directory, name))
	paths.sort()
	return paths


	def load_rgb_image(path: str) -> np.ndarray:
	"""
	Load an image from disk and return as float64 RGB array in [0,1].

	Parameters
	----------
	path : str
	Path to image file.

	Returns
	-------
	img : np.ndarray, shape (H, W, 3), dtype float64
	RGB image, intensities in [0, 1].
	"""
	im = Image.open(path).convert("RGB")
	arr = np.asarray(im, dtype=np.float64)
	if arr.ndim != 3 or arr.shape[2] != 3:
	raise ValueError(f"Expected an RGB image at {path}")
	return arr / 255.0


	# -------------------------------------------------------------------------
	# EM algorithm for the linear CFA correlation model (single channel)
	# -------------------------------------------------------------------------

	def em_probability_map(
	f: np.ndarray,
	N: int = 1,
	sigma0: float = 0.0075,
	p0: float = 1.0 / 256.0,
	max_iter: int = 50,
	tol: float = 1e-5,
	seed: int = 0,
	) -> Tuple[np.ndarray, np.ndarray]:
	"""
	Estimate the posterior probability map w(x,y) that each sample belongs to
	the linearly correlated model M1 via EM (Gaussian vs uniform mixture).

	Model for a single channel f(x,y):
	f(x,y) = sum_{u,v} alpha_{u,v} f(x+u, y+v) + n(x,y)
	where n(x,y) ~ N(0, sigma^2), alpha_{0,0} = 0.
	"""

	if f.ndim != 2:
	raise ValueError("em_probability_map expects a single 2D channel.")

	f = f.astype(np.float64, copy=False)
	H, W = f.shape
	if H <= 2 * N or W <= 2 * N:
	raise ValueError("Image too small for N = %d neighborhood." % N)

	# For N=1, there are (2N+1)^2 - 1 = 8 neighbors.
	OFFSETS: List[Tuple[int, int]] = []
	for dy in range(-N, N + 1):
	for dx in range(-N, N + 1):
	if dy == 0 and dx == 0:
	continue
	OFFSETS.append((dy, dx))
	K = len(OFFSETS)

	# Build design matrix X and observation vector y.
	num_pixels = (H - 2 * N) * (W - 2 * N)
	X = np.empty((num_pixels, K), dtype=np.float64)
	y = np.empty((num_pixels, 1), dtype=np.float64)

	idx = 0
	for yy in range(N, H - N):
	for xx in range(N, W - N):
	y[idx, 0] = f[yy, xx]
	for k, (dy, dx) in enumerate(OFFSETS):
	X[idx, k] = f[yy + dy, xx + dx]
	idx += 1

	rng = np.random.default_rng(seed)
	alpha = rng.normal(scale=0.01, size=(K, 1))

	# ---- NEW: enforce lower bound on sigma to avoid degeneracy ----
	EPS_SIGMA = 1e-6
	sigma = float(max(sigma0, EPS_SIGMA))

	for _ in range(max_iter):
	# E-step: residuals and posterior weights
	r = y - X @ alpha # (num_pixels,1)

	# Guard sigma here too
	if sigma < EPS_SIGMA:
	sigma = EPS_SIGMA

	coef = 1.0 / (sigma * math.sqrt(2.0 * math.pi))
	P = coef * np.exp(-0.5 * (r / sigma) ** 2) # Gaussian likelihood for M1

	# Posterior w = P / (P + p0)
	w = P / (P + p0)

	# M-step: weighted LS for alpha
	WX = X * w
	A = X.T @ WX
	b = X.T @ (w * y)

	try:
	alpha_new = np.linalg.solve(A, b)
	except np.linalg.LinAlgError:
	alpha_new, *_ = np.linalg.lstsq(A, b, rcond=None)

	# Update sigma^2 = sum w_i r_i^2 / sum w_i
	r = y - X @ alpha_new
	num = float(np.sum(w * (r ** 2)))
	den = float(np.sum(w))

	if den > 0.0:
	sigma_new = math.sqrt(num / den)
	else:
	sigma_new = sigma

	# Clamp again to avoid zero
	if sigma_new < EPS_SIGMA:
	sigma_new = EPS_SIGMA

	# Convergence check
	diff = np.linalg.norm(alpha_new - alpha)
	norm = np.linalg.norm(alpha)
	if norm > 0 and diff < tol * norm:
	alpha = alpha_new
	sigma = sigma_new
	break

	alpha = alpha_new
	sigma = sigma_new

	# Final posterior with converged alpha
	if sigma < EPS_SIGMA:
	sigma = EPS_SIGMA

	r = y - X @ alpha
	coef = 1.0 / (sigma * math.sqrt(2.0 * math.pi))
	P = coef * np.exp(-0.5 * (r / sigma) ** 2)
	w = P / (P + p0)

	prob_map = np.zeros_like(f)
	idx = 0
	for yy in range(N, H - N):
	for xx in range(N, W - N):
	prob_map[yy, xx] = w[idx, 0]
	idx += 1

	return prob_map, alpha.ravel()



	# -------------------------------------------------------------------------
	# CFA synthetic maps and Fourier-domain similarity
	# -------------------------------------------------------------------------

	def synthetic_cfa_map(
	shape: Tuple[int, int],
	channel: str,
	pattern: str = "RGGB",
	) -> np.ndarray:
	"""
	Build the synthetic binary map s_c(x,y) for a Bayer pattern:

	s_c(x,y) = 0 if CFA at (x,y) is color c
	= 1 otherwise

	Bayer patterns are specified as a 4-character string:
	'RGGB', 'BGGR', 'GRBG', 'GBRG', etc.

	pattern[0] -> (row%2==0, col%2==0)
	pattern[1] -> (row%2==0, col%2==1)
	pattern[2] -> (row%2==1, col%2==0)
	pattern[3] -> (row%2==1, col%2==1)
	"""
	H, W = shape
	if len(pattern) != 4:
	raise ValueError("Bayer pattern string must have length 4, e.g. 'RGGB'.")

	channel = channel.upper()
	if channel not in ("R", "G", "B"):
	raise ValueError("channel must be one of 'R', 'G', 'B'.")

	tile = np.array(list(pattern), dtype="<U1").reshape(2, 2)
	s = np.ones((H, W), dtype=np.float64)

	for y in range(H):
	for x in range(W):
	cy = y % 2
	cx = x % 2
	if tile[cy, cx] == channel:
	s[y, x] = 0.0

	return s


	def similarity_measure(prob_map: np.ndarray, synthetic_map: np.ndarray) -> float:
	"""
	Phase-insensitive similarity between a probability map and its CFA
	synthetic map:

	M(p,s) = sum \|F(p)\| * \|F(s)\|
	"""
	if prob_map.shape != synthetic_map.shape:
	raise ValueError("prob_map and synthetic_map must have the same shape.")

	Fp = fft2(prob_map)
	Fs = fft2(synthetic_map)
	return float(np.sum(np.abs(Fp) * np.abs(Fs)))


	# -------------------------------------------------------------------------
	# Sliding-window analysis
	# -------------------------------------------------------------------------

	def sliding_window_indices(H: int, W: int, window: int) -> List[Tuple[int, int]]:
	"""
	Generate (y,x) indices for sliding windows with 50% overlap.
	stride = window // 2 along each axis.
	"""
	if window > H or window > W:
	return [(0, 0)]

	stride = max(1, window // 2)
	indices = []
	y = 0
	while y + window <= H:
	x = 0
	while x + window <= W:
	indices.append((y, x))
	x += stride
	y += stride
	return indices


	def analyze_window(
	window: np.ndarray,
	pattern: str = "RGGB",
	em_kwargs: Dict = None,
	) -> Dict[str, Dict[str, np.ndarray]]:
	"""
	Run EM + CFA similarity on a single RGB window.

	Returns per-channel:
	'prob_map', 'synthetic', 'M', 'alpha'
	"""
	if window.ndim != 3 or window.shape[2] != 3:
	raise ValueError("Expected RGB window of shape (H,W,3).")

	if em_kwargs is None:
	em_kwargs = {}

	H, W, _ = window.shape
	channels = {
	"R": window[:, :, 0],
	"G": window[:, :, 1],
	"B": window[:, :, 2],
	}

	result: Dict[str, Dict[str, np.ndarray]] = {}
	for cname, ch in channels.items():
	prob_map, alpha = em_probability_map(ch, **em_kwargs)
	syn = synthetic_cfa_map((H, W), cname, pattern=pattern)
	M = similarity_measure(prob_map, syn)
	result[cname] = {
	"prob_map": prob_map,
	"synthetic": syn,
	"M": np.array(M, dtype=np.float64),
	"alpha": alpha,
	}

	return result


	def analyze_image_windows(
	img: np.ndarray,
	window: int = 256,
	pattern: str = "RGGB",
	em_kwargs: Dict = None,
	) -> List[Dict]:
	"""
	Apply CFA EM analysis to all sliding windows of an image.

	If image is smaller than `window`, the entire image is treated as one window.
	"""
	H, W, C = img.shape
	if C != 3:
	raise ValueError("Expected RGB image with 3 channels.")

	if em_kwargs is None:
	em_kwargs = {}

	if H < window or W < window:
	windows = [(0, 0)]
	w_h, w_w = H, W
	else:
	windows = sliding_window_indices(H, W, window)
	w_h = w_w = window

	results = []
	for (yy, xx) in windows:
	sub = img[yy : yy + w_h, xx : xx + w_w, :]
	res = analyze_window(sub, pattern=pattern, em_kwargs=em_kwargs)
	entry = {"y": yy, "x": xx, "h": sub.shape[0], "w": sub.shape[1]}
	entry.update(res)
	results.append(entry)

	return results


	# -------------------------------------------------------------------------
	# Threshold calibration and classification
	# -------------------------------------------------------------------------

	def calibrate_thresholds(
	negative_image_paths: Sequence[str],
	window: int = 256,
	pattern: str = "RGGB",
	em_kwargs: Dict = None,
	) -> Dict[str, float]:
	"""
	Estimate per-channel thresholds T_R, T_G, T_B to obtain ~0% false positives
	on a negative set (non-CFA / tampered images).

	Threshold per channel is defined as the maximum M value observed for that
	channel over all windows of all negative images.
	"""
	if em_kwargs is None:
	em_kwargs = {}

	Ms = {"R": [], "G": [], "B": []}

	for path in negative_image_paths:
	img = load_rgb_image(path)
	window_results = analyze_image_windows(
	img,
	window=window,
	pattern=pattern,
	em_kwargs=em_kwargs,
	)
	for r in window_results:
	Ms["R"].append(float(r["R"]["M"]))
	Ms["G"].append(float(r["G"]["M"]))
	Ms["B"].append(float(r["B"]["M"]))

	thresholds: Dict[str, float] = {}
	for c in ("R", "G", "B"):
	values = Ms[c]
	if not values:
	raise RuntimeError(f"No M values collected for channel {c}.")
	thresholds[c] = max(values)

	return thresholds


	def classify_windows(
	window_results: List[Dict],
	thresholds: Dict[str, float],
	green_only: bool = False,
	) -> List[Dict]:
	"""
	Add CFA/tampered labels to each window based on per-channel thresholds.

	If green_only=False (default, PF-style):
	channel is CFA-interpolated <=> M_c > T_c
	window authentic <=> any channel is CFA-interpolated

	If green_only=True:
	channel flags are still computed, but
	window authentic <=> GREEN channel is CFA-interpolated.
	"""
	classified = []
	for r in window_results:
	M_R = float(r["R"]["M"])
	M_G = float(r["G"]["M"])
	M_B = float(r["B"]["M"])

	chan_cfa = {
	"R": M_R > thresholds["R"],
	"G": M_G > thresholds["G"],
	"B": M_B > thresholds["B"],
	}

	if green_only:
	authentic = chan_cfa["G"]
	else:
	authentic = chan_cfa["R"] or chan_cfa["G"] or chan_cfa["B"]

	out = dict(r)
	out["channel_cfa"] = chan_cfa
	out["authentic"] = authentic
	classified.append(out)

	return classified


	def classify_image(
	img_path: str,
	thresholds: Dict[str, float],
	window: int = 256,
	pattern: str = "RGGB",
	em_kwargs: Dict = None,
	green_only: bool = False,
	) -> Dict:
	"""
	Run full sliding-window Popescu–Farid-style detector on a single image.

	Returns:
	{
	"image_path": str,
	"windows": [...],
	"image_authentic": bool,
	}
	"""
	img = load_rgb_image(img_path)
	window_results = analyze_image_windows(
	img,
	window=window,
	pattern=pattern,
	em_kwargs=em_kwargs,
	)
	classified = classify_windows(window_results, thresholds, green_only=green_only)

	image_authentic = any(w["authentic"] for w in classified)

	return {
	"image_path": img_path,
	"windows": classified,
	"image_authentic": image_authentic,
	}


	# -------------------------------------------------------------------------
	# CLI plumbing
	# -------------------------------------------------------------------------

	def add_em_args(parser: argparse.ArgumentParser) -> None:
	"""Add EM-related arguments to a subparser."""
	parser.add_argument(
	"--N",
	type=int,
	default=1,
	help="Neighborhood radius N for EM (default: 1).",
	)
	parser.add_argument(
	"--sigma0",
	type=float,
	default=0.0075,
	help="Initial sigma_0 for EM (default: 0.0075).",
	)
	parser.add_argument(
	"--p0",
	type=float,
	default=1.0 / 256.0,
	help=(
	"Outlier likelihood p0. Default 1/256 (PF-style for 8-bit data). "
	"For a uniform on [0,1], consider --p0 1.0."
	),
	)
	parser.add_argument(
	"--max-iter",
	type=int,
	default=50,
	help="Maximum EM iterations (default: 50).",
	)
	parser.add_argument(
	"--tol",
	type=float,
	default=1e-5,
	help="Relative convergence tolerance on alpha (default: 1e-5).",
	)
	parser.add_argument(
	"--seed",
	type=int,
	default=0,
	help="Random seed for EM initialization (default: 0).",
	)


	def em_args_to_kwargs(args: argparse.Namespace) -> Dict:
	"""Convert parsed EM args into kwargs dict for em_probability_map."""
	return dict(
	N=args.N,
	sigma0=args.sigma0,
	p0=args.p0,
	max_iter=args.max_iter,
	tol=args.tol,
	seed=args.seed,
	)


	def main():
	parser = argparse.ArgumentParser(
	description="Popescu–Farid CFA interpolation detector (EM + spectral similarity)."
	)
	subparsers = parser.add_subparsers(dest="command", required=True)

	# Calibrate subcommand
	cal_parser = subparsers.add_parser(
	"calibrate",
	help="Calibrate per-channel thresholds on negative (non-CFA) images.",
	)
	cal_parser.add_argument(
	"--neg-dir",
	required=True,
	help="Directory with negative (non-CFA / tampered) images for calibration.",
	)
	cal_parser.add_argument(
	"--window",
	type=int,
	default=256,
	help="Window size (square) for analysis (default: 256).",
	)
	cal_parser.add_argument(
	"--pattern",
	type=str,
	default="RGGB",
	help="Bayer pattern string (default: RGGB).",
	)
	cal_parser.add_argument(
	"--output",
	type=str,
	default=None,
	help="Path to JSON file to save thresholds (optional).",
	)
	add_em_args(cal_parser)

	# Detect subcommand
	det_parser = subparsers.add_parser(
	"detect",
	help="Run detector on images using pre-calibrated thresholds.",
	)
	det_parser.add_argument(
	"--image",
	type=str,
	default=None,
	help="Path to a single image.",
	)
	det_parser.add_argument(
	"--image-dir",
	type=str,
	default=None,
	help="Directory with images to process.",
	)
	det_parser.add_argument(
	"--thresholds",
	type=str,
	required=True,
	help="Path to JSON file with thresholds from 'calibrate'.",
	)
	det_parser.add_argument(
	"--window",
	type=int,
	default=256,
	help="Window size (square) for analysis (default: 256).",
	)
	det_parser.add_argument(
	"--pattern",
	type=str,
	default="RGGB",
	help="Bayer pattern string (default: RGGB).",
	)
	det_parser.add_argument(
	"--green-only",
	action="store_true",
	help="Use only GREEN channel for window/image decisions.",
	)
	add_em_args(det_parser)

	args = parser.parse_args()

	if args.command == "calibrate":
	neg_files = list_image_files(args.neg_dir)
	if not neg_files:
	raise SystemExit(f"No images found in negative directory: {args.neg_dir}")

	em_kwargs = em_args_to_kwargs(args)
	thresholds = calibrate_thresholds(
	negative_image_paths=neg_files,
	window=args.window,
	pattern=args.pattern,
	em_kwargs=em_kwargs,
	)

	# Print thresholds
	print("# Calibrated thresholds (0%% FPs on provided negatives):")
	for c in ("R", "G", "B"):
	print(f"T_{c} = {thresholds[c]:.6e}")

	# Optionally save to JSON
	if args.output is not None:
	out_obj = {
	"thresholds": thresholds,
	"pattern": args.pattern,
	"window": args.window,
	"em_params": em_kwargs,
	}
	with open(args.output, "w", encoding="utf-8") as f:
	json.dump(out_obj, f, indent=2)
	print(f"# Saved thresholds to {args.output}")

	elif args.command == "detect":
	# Load thresholds JSON
	with open(args.thresholds, "r", encoding="utf-8") as f:
	data = json.load(f)

	if "thresholds" in data:
	thresholds = data["thresholds"]
	else:
	thresholds = data # assume raw

	# Sanity check
	for c in ("R", "G", "B"):
	if c not in thresholds:
	raise SystemExit(f"Thresholds JSON missing channel '{c}'.")

	# Decide images to process
	images: List[str] = []
	if args.image is not None and args.image_dir is not None:
	raise SystemExit("Specify either --image or --image-dir, not both.")
	elif args.image is not None:
	images = [args.image]
	elif args.image_dir is not None:
	images = list_image_files(args.image_dir)
	if not images:
	raise SystemExit(
	f"No images found in directory: {args.image_dir}"
	)
	else:
	raise SystemExit("You must specify either --image or --image-dir.")

	em_kwargs = em_args_to_kwargs(args)
	green_only = bool(args.green_only)

	for img_path in images:
	result = classify_image(
	img_path,
	thresholds=thresholds,
	window=args.window,
	pattern=args.pattern,
	em_kwargs=em_kwargs,
	green_only=green_only,
	)

	label = "AUTHENTIC" if result["image_authentic"] else "TAMPERED"
	mode = "GREEN_ONLY" if green_only else "ALL_CHANNELS"
	print(f"\n# Image: {result['image_path']}")
	print(f"# Overall label: {label} (mode={mode})")
	print("# y x h w M_R M_G M_B CFA_R CFA_G CFA_B WINDOW_LABEL")

	for w in result["windows"]:
	y = w["y"]
	x = w["x"]
	h = w["h"]
	wd = w["w"]
	M_R = float(w["R"]["M"])
	M_G = float(w["G"]["M"])
	M_B = float(w["B"]["M"])
	cfaR = w["channel_cfa"]["R"]
	cfaG = w["channel_cfa"]["G"]
	cfaB = w["channel_cfa"]["B"]
	wlabel = "AUTH" if w["authentic"] else "TAMP"

	print(
	f"{y:5d} {x:5d} {h:4d} {wd:4d} "
	f"{M_R:.6e} {M_G:.6e} {M_B:.6e} "
	f"{int(cfaR)} {int(cfaG)} {int(cfaB)} {wlabel}"
	)


	if __name__ == "__main__":
	main()