Upload config.json with huggingface_hub

2290a07 verified about 1 month ago

5.07 kB

	{
	"version": "v5.1",
	"description": "AI Content Detection meta-classifier weights (stacking ensemble)",
	"modalities": [
	"image",
	"audio",
	"text",
	"video"
	],
	"benchmark": {
	"image": {
	"accuracy": 0.98,
	"tpr": 1.0,
	"tnr": 0.965,
	"f1": 0.978,
	"samples": 204
	},
	"audio": {
	"accuracy": 0.852,
	"tpr": 0.85,
	"tnr": 0.855,
	"f1": 0.866,
	"samples": 142
	},
	"text": {
	"accuracy": 0.93,
	"tpr": 0.917,
	"tnr": 0.947,
	"f1": 0.936,
	"samples": 129
	},
	"video": {
	"accuracy": 0.9,
	"tpr": 0.833,
	"tnr": 1.0,
	"samples": 30
	},
	"overall": {
	"accuracy": 0.928,
	"samples": 475
	}
	},
	"image": {
	"n_features": 8,
	"feature_names": [
	"NYUAD_ViT",
	"ai_vs_real_ViT",
	"DFDC_ViT",
	"ai_image_det_ViT",
	"fft_slope",
	"hf_ratio",
	"Bombek1_SigLIP2_DINOv2",
	"SPAI_CVPR2025"
	],
	"scaler_mean": [
	0.46721075337286583,
	0.4332848905084707,
	0.34848470501282125,
	0.7513610315914312,
	-2.7428234702735845,
	1.4757695660114816e-05,
	0.47213903127932083,
	0.5310949190042461
	],
	"scaler_scale": [
	0.4562829992667211,
	0.4653274721438903,
	0.2594560381028844,
	0.2566914952700282,
	0.31761878154208484,
	1.745336794888413e-05,
	0.4468171423032323,
	0.4707389622737817
	],
	"lr_coef": [
	0.6488963010751596,
	0.19470730198227582,
	0.3669096091179738,
	-1.1058065882150858,
	-0.47635552888598026,
	-0.015401252102331365,
	2.5029078795863406,
	1.237011726618108
	],
	"lr_intercept": -0.7403570533419102,
	"hf_models": [
	"Nahrawy/AIorNot",
	"haywoodsloan/ai-image-detector-deploy",
	"prithivMLmods/Deep-Fake-Detector-Model",
	"Organika/sdxl-detector",
	"Bombek/BombDetector-v3-SigLIP2-DINOv2-LoRA"
	],
	"notes": "Bombek1 is #1 feature (coef=+2.50). SPAI uses local repo."
	},
	"audio": {
	"n_features": 9,
	"feature_names": [
	"DavidCombei_wav2vec2",
	"Gustking_wav2vec2",
	"mo_thecreator_deepfake",
	"spectral_flatness",
	"centroid_mean",
	"centroid_std",
	"zero_crossing_rate",
	"spectral_rolloff",
	"DF_Arena_1B"
	],
	"scaler_mean": [
	0.5667607612050348,
	0.2773010993612484,
	0.23310774392822925,
	0.03141037016224877,
	1807.2398348786571,
	897.18004887457,
	0.12301036345108962,
	6620.40736210088,
	0.5433762406366287
	],
	"scaler_scale": [
	0.48680867334512096,
	0.29197482864644153,
	0.4211570130989059,
	0.024618810573647662,
	459.40344999868597,
	394.8528855416117,
	0.046570088698838365,
	829.6553459300637,
	0.4155082795685684
	],
	"lr_coef": [
	0.7845433297452213,
	-0.25601227158569434,
	0.38715143588917217,
	0.5305971113288093,
	0.14191280089652655,
	1.7648106776858394,
	-1.6174243839603224,
	-1.09787021389514,
	1.092684667819162
	],
	"lr_intercept": 0.39250921446958165,
	"hf_models": [
	"DavidCombei/wav2vec2-xls-r-1b-deepfake",
	"Gustking/ai_voice_detect_xlsr",
	"mo-thecreator/Deepfake-audio-detection",
	"Aloukik21/DF_Arena_1B"
	]
	},
	"text": {
	"n_features": 8,
	"feature_names": [
	"binoculars_score",
	"roberta_chatgpt_score",
	"perplexity_norm",
	"avg_sentence_length",
	"lexical_diversity",
	"hapax_ratio",
	"avg_word_length",
	"fakespot_roberta_score"
	],
	"scaler_mean": [
	1.1353826005329457,
	0.33250804246780497,
	-0.48164806951384675,
	5.916446148470062,
	0.6490103211442594,
	0.5124573713819743,
	5.220866125485708,
	0.6364287314816944
	],
	"scaler_scale": [
	0.19535976595611237,
	0.45007809250809544,
	0.21119484430166974,
	1.1937958293169302,
	0.19352867829552858,
	0.21389850106439456,
	1.2135677101079925,
	0.43094435530407293
	],
	"lr_coef": [
	-0.6243579398646565,
	0.389259232075374,
	-0.5040499517552531,
	-0.21291399657541557,
	-0.08360375807827485,
	-0.014109874794709326,
	0.22446151217916235,
	1.2266905154327146
	],
	"lr_intercept": 0.1964292008569683,
	"hf_models": [
	"tiiuae/falcon-7b",
	"tiiuae/falcon-7b-instruct",
	"roberta-base-openai-detector",
	"fakespot-ai/roberta-base-fakespot-detector"
	],
	"notes": "Binoculars uses falcon-7b pair. fakespot is #1 feature (coef=+1.23)."
	},
	"video": {
	"method": "frame_sampling + audio_extraction",
	"frame_detector": "image meta-classifier on sampled frames",
	"audio_detector": "audio meta-classifier on extracted audio (ffmpeg)",
	"combining": "50/50 equal weight of video_prob and audio_prob",
	"default_num_frames": 8,
	"notes": "Video = image detection on frames + audio detection on extracted track"
	}
	}