ChineseSafe-Benchmark / data /subclass_gen.csv
JaydenCool's picture
update files
d7041cd
Model,Size,Discrimination_Accuracy,Discrimination_Precision,Discrimination_Recall,Variant_Accuracy,Variant_Precision,Variant_Recall,Psychology_Accuracy,Psychology_Precision,Psychology_Recall,Politics_Accuracy,Politics_Precision,Politics_Recall,Eroticism_Accuracy,Eroticism_Precision,Eroticism_Recall,Vulgarity_Accuracy,Vulgarity_Precision,Vulgarity_Recall,Property_Accuracy,Property_Precision,Property_Recall,Injury_Accuracy,Injury_Precision,Injury_Recall,Criminality_Accuracy,Criminality_Precision,Criminality_Recall,Ethics_Accuracy,Ethics_Precision,Ethics_Recall
DeepSeek-LLM-67B-Chat,>65B,0.7897,0.7454,0.8652,0.8482,0.7832,0.9726,0.6603,0.6751,0.6011,0.8344,0.7978,0.932,0.8367,0.78,0.9497,0.8449,0.769,0.9767,0.7985,0.7493,0.8825,0.6171,0.6366,0.5125,0.8258,0.7583,0.9401,0.7387,0.7276,0.7596
Qwen1.5-72B-Chat,>65B,0.5998,0.693,0.3298,0.8005,0.8477,0.7444,0.4697,0.3314,0.0703,0.6671,0.812,0.506,0.7676,0.8369,0.6803,0.7069,0.7895,0.5476,0.5825,0.6666,0.2918,0.4697,0.3186,0.0668,0.7076,0.7867,0.546,0.5283,0.5803,0.1942
Opt-66B,>65B,0.4866,0.482,0.682,0.5174,0.5203,0.7258,0.5579,0.5338,0.8237,0.5646,0.5728,0.7868,0.5385,0.535,0.7659,0.5571,0.5309,0.8257,0.5414,0.5199,0.7954,0.5354,0.5181,0.7801,0.5376,0.515,0.7909,0.5079,0.5041,0.7185
Llama3-ChatQA-1.5-70B,>65B,0.6682,0.6617,0.6566,0.6859,0.6932,0.6922,0.6079,0.6187,0.5348,0.6548,0.7024,0.6342,0.6861,0.6945,0.6928,0.7029,0.6853,0.7281,0.6211,0.6242,0.5599,0.6105,0.6189,0.5397,0.7134,0.6873,0.7493,0.59,0.6072,0.4996
Yi-1.5-34B-Chat,~30B,0.66,0.6114,0.8339,0.7311,0.6644,0.9577,0.3309,0.2379,0.1626,0.6958,0.6708,0.8646,0.7046,0.6528,0.9053,0.7084,0.6383,0.9309,0.5928,0.5672,0.6961,0.4467,0.4308,0.3972,0.6956,0.6281,0.9097,0.5182,0.515,0.5425
Opt-30B,~30B,0.4672,0.4683,0.6648,0.5002,0.5082,0.7109,0.5044,0.4987,0.7354,0.5314,0.5517,0.7422,0.5108,0.5163,0.7304,0.5161,0.5039,0.7618,0.513,0.5009,0.7578,0.4956,0.4908,0.719,0.5119,0.4977,0.7583,0.4958,0.4955,0.7134
Baichuan2-13B-Chat,10B~20B,0.6337,0.6402,0.5755,0.7188,0.7164,0.7457,0.5185,0.5189,0.3417,0.7341,0.7487,0.7703,0.7033,0.7091,0.7143,0.6742,0.6712,0.6575,0.5657,0.5728,0.434,0.6151,0.6264,0.5371,0.6515,0.65,0.6089,0.5532,0.5707,0.414
Qwen1.5-14B,10B~20B,0.7099,0.6657,0.8141,0.7897,0.7205,0.9615,0.5669,0.5657,0.5226,0.7776,0.7373,0.9181,0.7571,0.7073,0.897,0.7862,0.7044,0.97,0.6421,0.6225,0.6757,0.5014,0.4893,0.3888,0.7563,0.6869,0.9116,0.5499,0.5538,0.4889
Ziya2-13B-Chat,10B~20B,0.5403,0.5272,0.5731,0.6597,0.6313,0.8034,0.3259,0.2145,0.1373,0.673,0.6631,0.8101,0.6526,0.6282,0.7886,0.5583,0.5437,0.6097,0.3987,0.3541,0.2823,0.529,0.5194,0.5497,0.5377,0.5208,0.5678,0.4567,0.4484,0.4035
InternLM2-Chat-20B,10B~20B,0.6819,0.7156,0.5781,0.7661,0.7819,0.7518,0.5506,0.5823,0.3134,0.8061,0.8182,0.8271,0.807,0.7993,0.832,0.8128,0.7876,0.8453,0.7037,0.7305,0.6224,0.6092,0.6548,0.4308,0.7815,0.7702,0.7821,0.5613,0.6058,0.3396
Opt-13B,10B~20B,0.4746,0.4724,0.637,0.5147,0.519,0.7014,0.5146,0.5059,0.7153,0.5333,0.5557,0.7126,0.5261,0.5278,0.7228,0.5187,0.506,0.7257,0.5232,0.5081,0.7367,0.5218,0.5094,0.7314,0.4956,0.4856,0.6828,0.4722,0.4773,0.6264
Gemma-1.1-7B,5B~10B,0.7849,0.7205,0.9139,0.8081,0.7454,0.9485,0.6024,0.6084,0.5413,0.7854,0.758,0.8894,0.8017,0.7436,0.9353,0.8215,0.7367,0.9884,0.6669,0.6543,0.673,0.5811,0.5858,0.4976,0.7831,0.7167,0.9127,0.6684,0.6638,0.6754
Qwen1.5-7B-Chat,5B~10B,0.6885,0.6347,0.8535,0.7677,0.6891,0.9938,0.6929,0.6404,0.8588,0.7791,0.7151,0.9869,0.7653,0.6889,0.988,0.7485,0.6659,0.9746,0.684,0.6317,0.8443,0.7267,0.6564,0.929,0.7473,0.662,0.9772,0.5545,0.5496,0.5778
Yi-1.5-9B-Chat,5B~10B,0.7025,0.6913,0.7058,0.7032,0.7106,0.707,0.4533,0.3925,0.2,0.6546,0.7097,0.6172,0.7209,0.7213,0.7419,0.8197,0.7508,0.9452,0.5595,0.5666,0.4131,0.4342,0.3378,0.1591,0.7626,0.7215,0.8306,0.4057,0.2654,0.1096
DeepSeek-LLM-7B-Chat,5B~10B,0.6455,0.6405,0.6242,0.8131,0.749,0.9539,0.6146,0.6202,0.5617,0.7978,0.7642,0.9083,0.7978,0.7439,0.9236,0.7995,0.7291,0.9387,0.691,0.6715,0.7174,0.6343,0.6345,0.6017,0.7582,0.7064,0.8562,0.6311,0.6381,0.5954
GPT-J-6B,5B~10B,0.5076,0.4966,0.5752,0.5259,0.5322,0.6057,0.548,0.5343,0.6564,0.5565,0.5828,0.6522,0.5454,0.5487,0.6439,0.5365,0.5223,0.6345,0.527,0.513,0.6156,0.5365,0.5235,0.634,0.5386,0.5195,0.6408,0.4891,0.4884,0.5365
Baichuan2-7B,5B~10B,0.619,0.7508,0.3303,0.6409,0.7993,0.3973,0.5355,0.604,0.1652,0.6101,0.8093,0.3705,0.6285,0.7908,0.3739,0.6831,0.8077,0.4616,0.5551,0.6437,0.198,0.5592,0.6601,0.2106,0.683,0.8045,0.4568,0.5144,0.5518,0.1293
GLM-4-9B-Chat,5B~10B,0.7691,0.8562,0.6352,0.7669,0.868,0.6424,0.4801,0.3396,0.0518,0.8123,0.8964,0.7414,0.8671,0.8962,0.8388,0.9278,0.8991,0.9611,0.6401,0.7767,0.3713,0.5054,0.4911,0.1002,0.8728,0.8863,0.8468,0.5132,0.5544,0.1254
InternLM2-Chat-7B,5B~10B,0.53,0.5321,0.3028,0.6981,0.7292,0.6512,0.5182,0.5207,0.2824,0.6362,0.7192,0.5431,0.6717,0.7137,0.6002,0.6512,0.6763,0.551,0.5731,0.5951,0.3902,0.5205,0.5203,0.2849,0.6413,0.6626,0.5289,0.4783,0.4438,0.2061
Opt-6.7B,5B~10B,0.4717,0.4691,0.6091,0.5087,0.5153,0.6691,0.4931,0.4895,0.6491,0.5308,0.5556,0.6899,0.5215,0.5249,0.6922,0.4969,0.4902,0.6595,0.4803,0.4756,0.6266,0.488,0.4842,0.6406,0.4819,0.4741,0.6315,0.4627,0.4684,0.5853
Mistral-7B,5B~10B,0.7069,0.6749,0.7706,0.7521,0.7161,0.8533,0.5826,0.5868,0.5167,0.7142,0.7222,0.7711,0.7599,0.7205,0.8679,0.7956,0.7205,0.9509,0.6748,0.6547,0.7042,0.6139,0.6127,0.5802,0.7742,0.7074,0.9103,0.6388,0.6387,0.6313
Llama3-ChatQA-1.5-8B,5B~10B,0.6114,0.5657,0.8761,0.6276,0.5904,0.885,0.5978,0.5613,0.844,0.6056,0.6016,0.8128,0.6113,0.5825,0.8521,0.6365,0.5805,0.9258,0.6062,0.5625,0.8663,0.6034,0.5629,0.8569,0.6223,0.5694,0.903,0.5658,0.5447,0.7752