{ "time": "241031154353", "results": { "GPT-4o (0513, detail-high)": { "META": { "Method": [ "GPT-4o (0513, detail-high)", "https://openai.com/index/hello-gpt-4o/" ], "Parameters": "", "Language Model": "", "Vision Model": "", "Org": "OpenAI", "Time": "2024/05/31", "Verified": "Yes", "OpenSource": "No", "key": 270, "dir_name": "GPT4o_HIGH" }, "SEEDBench_IMG": { "Overall": 77.1, "Instance Attributes": 79.3, "Instance Identity": 81.0, "Instance Interaction": 80.4, "Instance Location": 72.9, "Instances Counting": 69.5, "Scene Understanding": 80.1, "Spatial Relation": 67.9, "Text Understanding": 72.6, "Visual Reasoning": 83.1, "Overall (official)": "N/A" }, "CCBench": { "Overall": 71.2, "Sketch Reasoning": 91.1, "Historical Figure": 37.1, "Calligraphy Painting": 70.2, "Scenery Building": 89.5, "Food Clothes": 62.6, "Cultural Relic": 67.0, "Traditional Show": 71.2 }, "MMBench_TEST_EN": { "Overall": 83.4, "CP": 87.4, "FP-S": 78.9, "FP-C": 83.8, "AR": 86.5, "LR": 80.3, "RR": 80.6 }, "MMBench_TEST_CN": { "Overall": 82.1, "CP": 87.6, "FP-S": 76.6, "FP-C": 83.4, "AR": 83.7, "LR": 78.0, "RR": 80.1 }, "MMBench_TEST_EN_V11": { "Overall": 83.0, "AR": 90.2, "CP": 81.3, "FP-C": 86.1, "FP-S": 81.4, "LR": 78.8, "RR": 82.2, "Action Recognition": 93.2, "Attribute Comparison": 82.7, "Attribute Recognition": 91.0, "Celebrity Recognition": 62.6, "Function Reasoning": 93.3, "Future Prediction": 82.7, "Identity Reasoning": 98.7, "Image Emotion": 81.1, "Image Quality": 59.7, "Image Scene": 88.2, "Image Style": 83.7, "Image Topic": 97.8, "Nature Relation": 92.4, "Object Localization": 84.8, "Ocr": 98.9, "Physical Property Reasoning": 78.5, "Physical Relation": 61.3, "Social Relation": 89.0, "Spatial Relationship": 78.7, "Structuralized Imagetext Understanding": 76.1 }, "MMBench_TEST_CN_V11": { "Overall": 81.5, "AR": 86.5, "CP": 81.5, "FP-C": 85.0, "FP-S": 79.1, "LR": 77.2, "RR": 79.8, "Action Recognition": 94.0, "Attribute Comparison": 81.3, "Attribute Recognition": 91.0, "Celebrity Recognition": 57.8, "Function Reasoning": 94.4, "Future Prediction": 82.7, "Identity Reasoning": 97.4, "Image Emotion": 85.6, "Image Quality": 58.9, "Image Scene": 88.2, "Image Style": 80.4, "Image Topic": 98.9, "Nature Relation": 94.6, "Object Localization": 82.9, "Ocr": 97.8, "Physical Property Reasoning": 67.1, "Physical Relation": 53.3, "Social Relation": 86.8, "Spatial Relationship": 74.7, "Structuralized Imagetext Understanding": 73.4 }, "MME": { "Overall": 2310.3, "Perception": 1614.2, "Cognition": 696.1, "OCR": 192.5, "Artwork": 145.2, "Celebrity": 67.9, "Code Reasoning": 177.5, "Color": 185.0, "Commonsense Reasoning": 178.6, "Count": 185.0, "Existence": 185.0, "Landmark": 182.0, "Numerical Calculation": 147.5, "Position": 133.3, "Posters": 191.2, "Scene": 147.0, "Text Translation": 192.5 }, "MMVet": { "Rec": 67.8, "Ocr": 76.8, "Know": 58.3, "Gen": 56.9, "Spat": 74.3, "Math": 76.2, "Overall": 69.1, "Overall (official)": "N/A" }, "MMMU_VAL": { "Overall": 69.2, "Art & Design": 72.5, "Business": 73.3, "Science": 64.7, "Health & Medicine": 74.0, "Humanities & Social Science": 80.8, "Tech & Engineering": 57.6 }, "MathVista": { "Overall": 61.3, "SCI": 64.8, "TQA": 70.3, "NUM": 44.4, "ARI": 58.4, "VQA": 47.5, "GEO": 61.5, "ALG": 62.3, "GPS": 60.1, "MWP": 69.9, "LOG": 43.2, "FQA": 60.2, "STA": 68.4 }, "HallusionBench": { "aAcc": 70.2, "fAcc": 49.1, "qAcc": 45.5, "Overall": 55.0 }, "LLaVABench": { "Overall": 102.0, "Conv": 93.6, "Complex": 111.2, "Detail": 93.6, "Overall (official)": "N/A" }, "AI2D": { "Overall": 84.6, "atomStructure": 75.0, "eclipses": 90.3, "faultsEarthquakes": 78.6, "foodChainsWebs": 92.2, "lifeCycles": 83.5, "moonPhaseEquinox": 68.2, "partsOfA": 80.9, "partsOfTheEarth": 82.7, "photosynthesisRespiration": 83.5, "rockCycle": 73.1, "rockStrata": 87.8, "solarSystem": 97.2, "typesOf": 81.0, "volcano": 100.0, "waterCNPCycle": 68.2 }, "ScienceQA_VAL": { "Overall": 89.7, "Adaptations": 97.9, "Adaptations and natural selection": 100.0, "Age of Exploration": 100.0, "Ancient Egypt and Kush": 100.0, "Ancient Mesopotamia": 100.0, "Animals": 100.0, "Astronomy": 100.0, "Atoms and molecules": 100.0, "Basic economic principles": 32.8, "Chemical reactions": 100.0, "Cities": 87.5, "Classification": 98.8, "Classification and scientific names": 100.0, "Climate change": 100.0, "Colonial America": 90.5, "Context clues": 100.0, "Descriptive details": 100.0, "Designing experiments": 100.0, "Domain-specific vocabulary": 60.0, "Early 19th century American history": 100.0, "Early Americas": 50.0, "Earth events": 100.0, "Ecological interactions": 76.0, "Ecosystems": 95.5, "Engineering practices": 100.0, "English colonies in North America": 74.4, "Force and motion": 84.0, "Fossils": 82.4, "Genes to traits": 83.0, "Geography": 98.6, "Government": 100.0, "Independent reading comprehension": 100.0, "Informational texts: level 1": 100.0, "Magnets": 72.2, "Maps": 96.8, "Materials": 96.6, "Medieval Asia": 100.0, "Natural resources and human impacts": 100.0, "Oceania: geography": 59.6, "Oceans and continents": 100.0, "Oceans and continents\t": 100.0, "Particle motion and energy": 92.6, "Persuasive strategies": 100.0, "Physical Geography": 83.7, "Plant reproduction": 90.0, "Plants": 100.0, "Plate tectonics": 100.0, "Read-alone texts": 100.0, "Rocks and minerals": 100.0, "Rome and the Byzantine Empire": 100.0, "Scientific names": 100.0, "Solutions": 65.7, "State capitals": 100.0, "States": 100.0, "States of matter": 97.4, "The American Revolution": 100.0, "The Americas: geography": 83.3, "The Antebellum period": 100.0, "The Civil War and Reconstruction": 100.0, "The Silk Road": 100.0, "Thermal energy": 100.0, "Velocity, acceleration, and forces": 68.6, "Visual elements": 100.0, "Water cycle": 100.0, "Weather and climate": 90.6, "World religions": 100.0 }, "ScienceQA_TEST": { "Overall": 90.7, "Adaptations": 100.0, "Ancient Egypt and Kush": 100.0, "Ancient Mesopotamia": 100.0, "Animals": 100.0, "Astronomy": 100.0, "Atoms and molecules": 100.0, "Basic economic principles": 38.0, "Cells": 100.0, "Chemical reactions": 100.0, "Cities": 91.7, "Classification": 100.0, "Classification and scientific names": 100.0, "Climate change": 100.0, "Colonial America": 81.6, "Context clues": 100.0, "Descriptive details": 100.0, "Designing experiments": 100.0, "Domain-specific vocabulary": 100.0, "Early 19th century American history": 100.0, "Earth events": 100.0, "Ecological interactions": 66.7, "Ecosystems": 90.4, "Engineering practices": 98.2, "English colonies in North America": 92.3, "Force and motion": 100.0, "Fossils": 100.0, "Genes to traits": 76.3, "Geography": 95.2, "Government": 100.0, "Greece": 100.0, "Independent reading comprehension": 100.0, "Informational texts: level 1": 100.0, "Kinetic and potential energy": 100.0, "Magnets": 77.3, "Maps": 97.8, "Materials": 96.5, "Medieval Asia": 100.0, "Oceania: geography": 76.5, "Oceans and continents": 100.0, "Oceans and continents\t": 100.0, "Particle motion and energy": 97.6, "Persuasive strategies": 100.0, "Photosynthesis": 100.0, "Physical Geography": 92.2, "Plant reproduction": 100.0, "Plants": 66.7, "Plate tectonics": 100.0, "Read-alone texts": 100.0, "Rocks and minerals": 100.0, "Scientific names": 100.0, "Solutions": 72.2, "State capitals": 100.0, "States": 94.4, "States of matter": 100.0, "The American Revolution": 100.0, "The Americas: geography": 71.1, "The Antebellum period": 100.0, "The Civil War and Reconstruction": 100.0, "Thermal energy": 95.5, "Topographic maps": 100.0, "Velocity, acceleration, and forces": 67.7, "Visual elements": 100.0, "Water cycle": 100.0, "Weather and climate": 91.4, "World religions": 100.0 }, "OCRBench": { "Text Recognition": 199, "Scene Text-centric VQA": 181, "Doc-oriented VQA": 168, "Key Information Extraction": 170, "Handwritten Mathematical Expression Recognition": 18, "Final Score": 736 }, "MMStar": { "Overall": 63.9, "coarse perception": 73.6, "fine-grained perception": 54.8, "instance reasoning": 66.4, "logical reasoning": 72.0, "math": 66.4, "science & technology": 50.0 }, "RealWorldQA": { "Overall": 75.4 }, "POPE": { "Overall": 85.6, "acc": 86.7, "precision": 93.0, "recall": 79.3 }, "SEEDBench2_Plus": { "Overall": 72.0, "chart": 71.4, "map": 62.0, "web": 85.2 }, "MMT-Bench_VAL": { "Overall": 67.3, "VR": 85.3, "Loc": 68.1, "OCR": 82.5, "Count": 57.2, "HLN": 75.0, "IR": 85.0, "3D": 57.5, "VC": 87.9, "VG": 46.2, "DU": 72.9, "AR": 51.0, "PLP": 43.5, "I2IT": 50.0, "RR": 76.2, "IQT": 15.0, "Emo": 58.3, "VI": 33.9, "MemU": 87.5, "VPU": 84.9, "AND": 57.0, "KD": 57.1, "VCR": 80.0, "IEJ": 40.0, "MIA": 42.5, "CIM": 61.7, "TU": 49.5, "VP": 66.7, "MedU": 74.0, "AUD": 58.0, "DKR": 64.6, "EA": 90.0, "GN": 46.2, "abstract_visual_recognition": 85.0, "action_quality_assessment": 15.0, "age_gender_race_recognition": 60.0, "anatomy_identification": 75.0, "animal_keypoint_detection": 35.0, "animals_recognition": 100.0, "animated_character_recognition": 90.0, "art_design": 81.8, "artwork_emotion_recognition": 55.0, "astronomical_recognition": 100.0, "attribute_hallucination": 80.0, "behavior_anomaly_detection": 30.0, "body_emotion_recognition": 40.0, "building_recognition": 90.0, "business": 66.7, "camouflage_object_detection": 55.0, "celebrity_recognition": 0.0, "chart_to_table": 95.0, "chart_to_text": 90.0, "chart_vqa": 70.0, "chemical_apparatusn_recognition": 80.0, "clock_reading": 30.0, "clothes_keypoint_detection": 70.0, "color_assimilation": 35.0, "color_constancy": 14.3, "color_contrast": 40.0, "color_recognition": 95.0, "counting_by_category": 33.8, "counting_by_reasoning": 95.0, "counting_by_visual_prompting": 50.0, "crowd_counting": 50.0, "deepfake_detection": 60.0, "depth_estimation": 40.0, "disaster_recognition": 85.0, "disease_diagnose": 60.0, "doc_vqa": 80.0, "electronic_object_recognition": 100.0, "eqn2latex": 90.0, "exist_hallucination": 90.0, "facail_expression_change_recognition": 95.0, "face_detection": 90.0, "face_mask_anomaly_dectection": 70.0, "face_retrieval": 100.0, "facial_expression_recognition": 75.0, "fashion_recognition": 75.0, "film_and_television_recognition": 95.0, "font_recognition": 50.0, "food_recognition": 100.0, "furniture_keypoint_detection": 55.0, "gaze_estimation": 10.0, "general_action_recognition": 95.0, "geometrical_perspective": 50.0, "geometrical_relativity": 30.0, "gesture_recognition": 65.0, "google_apps": 50.0, "gui_general": 45.0, "gui_install": 50.0, "handwritten_mathematical_expression_recognition": 90.0, "handwritten_retrieval": 90.0, "handwritten_text_recognition": 100.0, "health_medicine": 92.9, "helmet_anomaly_detection": 90.0, "human_interaction_understanding": 95.0, "human_keypoint_detection": 70.0, "human_object_interaction_recognition": 75.0, "humanitites_social_science": 54.5, "image2image_retrieval": 75.0, "image_based_action_recognition": 95.0, "image_captioning": 100.0, "image_captioning_paragraph": 95.0, "image_colorization": 60.0, "image_dense_captioning": 68.4, "image_matting": 15.0, "image_quality_assessment": 35.0, "image_season_recognition": 80.0, "industrial_produce_anomaly_detection": 40.0, "instance_captioning": 95.0, "interactive_segmentation": 85.7, "jigsaw_puzzle_solving": 40.0, "landmark_recognition": 100.0, "lesion_grading": 90.0, "logo_and_brand_recognition": 95.0, "lvlm_response_judgement": 45.0, "medical_modality_recognition": 100.0, "meme_image_understanding": 95.0, "meme_vedio_understanding": 80.0, "mevis": 30.0, "micro_expression_recognition": 20.0, "multiple_image_captioning": 95.0, "multiple_instance_captioning": 95.0, "multiple_view_image_understanding": 10.0, "muscial_instrument_recognition": 95.0, "national_flag_recognition": 100.0, "navigation": 90.0, "next_img_prediction": 65.0, "object_detection": 90.0, "one_shot_detection": 85.0, "order_hallucination": 50.0, "other_biological_attributes": 45.0, "painting_recognition": 90.0, "person_reid": 95.0, "pixel_localization": 25.0, "pixel_recognition": 55.0, "plant_recognition": 90.0, "point_tracking": 35.0, "polygon_localization": 40.0, "profession_recognition": 90.0, "ravens_progressive_matrices": 15.0, "reason_seg": 47.4, "referring_detection": 45.0, "relation_hallucination": 80.0, "religious_recognition": 75.0, "remote_sensing_object_detection": 60.0, "rock_recognition": 80.0, "rotated_object_detection": 77.8, "salient_object_detection_rgb": 55.0, "salient_object_detection_rgbd": 50.0, "scene_emotion_recognition": 65.0, "scene_graph_recognition": 85.0, "scene_recognition": 65.0, "scene_text_recognition": 90.0, "science": 58.3, "screenshot2code": 60.0, "sculpture_recognition": 80.0, "shape_recognition": 95.0, "sign_language_recognition": 40.0, "single_object_tracking": 65.0, "sketch2code": 50.0, "sketch2image_retrieval": 95.0, "small_object_detection": 60.0, "social_relation_recognition": 50.0, "som_recognition": 94.7, "sports_recognition": 95.0, "spot_the_diff": 10.0, "spot_the_similarity": 75.0, "table_structure_recognition": 50.0, "tech_engineering": 33.3, "temporal_anticipation": 75.0, "temporal_localization": 52.6, "temporal_ordering": 25.0, "temporal_sequence_understanding": 25.0, "text2image_retrieval": 55.0, "texture_material_recognition": 75.0, "threed_cad_recognition": 70.0, "threed_indoor_recognition": 45.0, "traffic_anomaly_detection": 55.0, "traffic_light_understanding": 100.0, "traffic_participants_understanding": 60.0, "traffic_sign_understanding": 95.0, "transparent_object_detection": 75.0, "vehicle_keypoint_detection": 55.6, "vehicle_recognition": 100.0, "vehicle_retrieval": 85.0, "video_captioning": 95.0, "visual_document_information_extraction": 95.0, "visual_prompt_understanding": 75.0, "waste_recognition": 100.0, "weapon_recognition": 100.0, "weather_recognition": 100.0, "web_shopping": 40.0, "whoops": 80.0, "writing_poetry_from_image": 60.0 }, "BLINK": { "Overall": 68.0, "Art_Style": 82.9, "Counting": 66.7, "Forensic_Detection": 90.9, "Functional_Correspondence": 43.1, "IQ_Test": 32.0, "Jigsaw": 76.7, "Multi-view_Reasoning": 58.6, "Object_Localization": 69.7, "Relative_Depth": 75.8, "Relative_Reflectance": 32.8, "Semantic_Correspondence": 61.2, "Spatial_Relation": 83.2, "Visual_Correspondence": 92.4, "Visual_Similarity": 83.0 }, "QBench": { "Overall": 78.9, "type_0_concern_0": 82.4, "type_0_concern_1": 82.3, "type_0_concern_2": 81.2, "type_0_concern_3": 87.1, "type_1_concern_0": 76.7, "type_1_concern_1": 84.8, "type_1_concern_2": 87.0, "type_1_concern_3": 88.9, "type_2_concern_0": 66.5, "type_2_concern_1": 72.4, "type_2_concern_2": 66.7, "type_2_concern_3": 80.0 }, "ABench": { "Overall": 79.2, "part1 -> bag_of_words -> attribute": 92.7, "part1 -> bag_of_words -> composition -> arrangement": 86.7, "part1 -> bag_of_words -> composition -> occlusion": 60.0, "part1 -> bag_of_words -> composition -> orientation": 76.9, "part1 -> bag_of_words -> composition -> size": 71.4, "part1 -> bag_of_words -> counting": 79.6, "part1 -> bag_of_words -> noun_as_adjective": 81.4, "part1 -> basic_recognition -> major": 92.9, "part1 -> basic_recognition -> minor": 93.2, "part1 -> outside_knowledge -> contradiction overcome": 70.8, "part1 -> outside_knowledge -> specific-terms -> company": 100.0, "part1 -> outside_knowledge -> specific-terms -> creature": 83.3, "part1 -> outside_knowledge -> specific-terms -> daily": 94.1, "part1 -> outside_knowledge -> specific-terms -> food": 95.5, "part1 -> outside_knowledge -> specific-terms -> geography": 81.0, "part1 -> outside_knowledge -> specific-terms -> material": 95.2, "part1 -> outside_knowledge -> specific-terms -> science": 100.0, "part1 -> outside_knowledge -> specific-terms -> sports": 68.2, "part1 -> outside_knowledge -> specific-terms -> style -> abstract": 100.0, "part1 -> outside_knowledge -> specific-terms -> style -> art": 100.0, "part1 -> outside_knowledge -> specific-terms -> style -> art_deco": 100.0, "part1 -> outside_knowledge -> specific-terms -> style -> cubism": 100.0, "part1 -> outside_knowledge -> specific-terms -> style -> dadaism": 100.0, "part1 -> outside_knowledge -> specific-terms -> style -> deco": 100.0, "part1 -> outside_knowledge -> specific-terms -> style -> expressionism": 100.0, "part1 -> outside_knowledge -> specific-terms -> style -> fauvism": 100.0, "part1 -> outside_knowledge -> specific-terms -> style -> futurism": 66.7, "part1 -> outside_knowledge -> specific-terms -> style -> minimalism": 100.0, "part1 -> outside_knowledge -> specific-terms -> style -> pop": 100.0, "part1 -> outside_knowledge -> specific-terms -> style -> psychedelic": 100.0, "part1 -> outside_knowledge -> specific-terms -> style -> steampunk": 100.0, "part1 -> outside_knowledge -> specific-terms -> style -> surrealism": 100.0, "part1 -> outside_knowledge -> specific-terms -> style -> victorian": 0.0, "part1 -> outside_knowledge -> specific-terms -> vehicle": 94.7, "part1 -> outside_knowledge -> specific-terms -> weather": 92.3, "part2 -> aesthetic": 62.6, "part2 -> generative": 72.4, "part2 -> technical": 74.9 }, "MTVQA": { "Overall": 31.2, "AR": 21.3, "DE": 35.1, "FR": 42.2, "IT": 37.2, "JA": 19.9, "KR": 35.1, "RU": 15.9, "TH": 26.0, "VI": 39.6 } } } }