ytaek-oh commited on
Commit
4946c0b
·
1 Parent(s): 6b607a9

add results

Browse files
data/250126/decoder_overall_invalid.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ family,model,tag,add_relation_XS,add_relation_S,add_relation_M,add_relation_L,add_attribute_XS,add_attribute_S,add_attribute_M,add_attribute_L,Avg
2
+ vqascore,instructblip-flant5-xl,none,60.0,61.029411764705884,50.0,50.0,40.909090909090914,47.368421052631575,50.0,50.0,51.163365465803544
3
+ vqascore,clip-flant5-xl,none,62.5,49.26470588235294,50.0,37.93103448275862,65.15151515151516,46.05263157894737,52.67857142857143,45.0,51.07230731551819
4
+ vqascore,llava-v1.5-7b,none,75.0,51.470588235294116,46.52777777777777,55.172413793103445,59.09090909090909,67.10526315789474,44.64285714285714,40.0,54.87622614972955
5
+ vqascore,sharegpt4v-7b,none,62.5,58.08823529411765,55.55555555555556,75.86206896551724,58.33333333333333,56.578947368421055,55.357142857142854,50.0,59.03441042176096
data/250126/decoder_overall_valid.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ family,model,tag,add_relation_XS,add_relation_S,add_relation_M,add_relation_L,add_attribute_XS,add_attribute_S,add_attribute_M,add_attribute_L,Avg
2
+ vqascore,instructblip-flant5-xl,none,61.42857142857143,60.858585858585855,50.0,50.0,57.758620689655174,48.701298701298704,50.0,50.0,53.593384584763896
3
+ vqascore,clip-flant5-xl,none,47.14285714285714,50.0,44.29657794676806,34.112149532710276,55.603448275862064,55.1948051948052,59.772727272727266,57.14285714285714,50.40817781357339
4
+ vqascore,llava-v1.5-7b,none,59.28571428571429,49.747474747474755,51.33079847908745,57.47663551401868,59.05172413793103,51.2987012987013,60.90909090909091,57.14285714285714,55.780374564359434
5
+ vqascore,sharegpt4v-7b,none,50.71428571428571,49.494949494949495,61.78707224334601,66.82242990654206,59.48275862068966,48.051948051948045,60.45454545454545,67.14285714285714,57.99385582864545
data/250126/decoder_summary_invalid.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ "('Model', 'family')","('Model', 'model')","('Model', 'tag')","('length_group', 'XS')","('length_group', 'S')","('length_group', 'M')","('length_group', 'L')","('neg_type', 'relation')","('neg_type', 'attribute')","('Avg', 'Avg')"
2
+ vqascore,instructblip-flant5-xl,none,50.45454545454545,54.19891640866873,50.0,50.0,55.25735294117647,47.069377990430624,51.163365465803544
3
+ vqascore,clip-flant5-xl,none,63.82575757575758,47.658668730650156,51.339285714285715,41.46551724137931,49.92393509127788,52.22067953975849,51.07230731551819
4
+ vqascore,llava-v1.5-7b,none,67.04545454545455,59.28792569659443,45.585317460317455,47.58620689655172,57.04269495154384,52.70975734791524,54.87622614972955
5
+ vqascore,sharegpt4v-7b,none,60.416666666666664,57.33359133126935,55.4563492063492,62.93103448275862,63.001464953797615,55.06735588972431,59.03441042176096
data/250126/decoder_summary_valid.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ "('Model', 'family')","('Model', 'model')","('Model', 'tag')","('length_group', 'XS')","('length_group', 'S')","('length_group', 'M')","('length_group', 'L')","('neg_type', 'relation')","('neg_type', 'attribute')","('Avg', 'Avg')"
2
+ vqascore,instructblip-flant5-xl,none,59.5935960591133,54.779942279942276,50.0,50.0,55.571789321789325,51.61497984773847,53.593384584763896
3
+ vqascore,clip-flant5-xl,none,51.3731527093596,52.5974025974026,52.03465260974767,45.62750333778371,43.88789615558387,56.92845947156292,50.40817781357339
4
+ vqascore,llava-v1.5-7b,none,59.16871921182266,50.52308802308803,56.11994469408918,57.30974632843791,54.46015575657379,57.10059337214509,55.780374564359434
5
+ vqascore,sharegpt4v-7b,none,55.09852216748769,48.77344877344877,61.12080884894573,66.9826435246996,57.20468433978082,58.78302731751007,57.99385582864545
data/250126/overall.csv ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ family,model,tag,add_relation_XS,add_relation_S,add_relation_M,add_relation_L,add_attribute_XS,add_attribute_S,add_attribute_M,add_attribute_L,Avg,add_relation_XS,add_relation_S,add_relation_M,add_relation_L,add_attribute_XS,add_attribute_S,add_attribute_M,add_attribute_L,Avg
2
+ ALIGN,align-base,coyo700m,55.0,50.0,50.0,50.0,64.4396551724138,50.0,50.0,50.0,52.42995689655172,62.5,50.0,50.0,50.0,67.42424242424242,50.0,50.0,50.0,53.740530303030305
3
+ CLIPS,CLIPS-Large-14-224,recap-datacomp1b,58.57142857142857,50.0,50.0,50.0,62.93103448275862,50.64935064935065,50.0,50.0,52.76897671294223,72.5,50.0,50.0,50.0,61.36363636363637,50.0,50.0,50.0,54.23295454545455
4
+ CLIPS,CLIPS-Large-14-336,recap-datacomp1b,54.28571428571429,50.0,50.0,50.0,61.20689655172414,50.64935064935065,50.0,50.0,52.01774518584863,72.5,48.529411764705884,50.0,50.0,58.333333333333336,50.0,50.0,50.0,53.6703431372549
5
+ CLIPS,CLIPS-Huge-14-224,recap-datacomp1b,51.42857142857143,50.0,50.0,50.0,66.8103448275862,49.35064935064935,50.0,50.0,52.19869570085088,57.5,48.529411764705884,50.0,50.0,68.93939393939394,50.0,50.0,50.0,53.12110071301248
6
+ DreamLIP,dreamlip-vitb16,cc3m-long,60.0,50.0,50.0,50.0,59.91379310344828,50.0,50.0,50.0,52.48922413793103,55.00000000000001,50.0,50.0,50.0,56.81818181818181,50.0,50.0,50.0,51.47727272727273
7
+ DreamLIP,dreamlip-vitb16,cc12m-long,48.57142857142857,50.0,50.0,50.0,57.758620689655174,50.0,50.0,50.0,50.791256157635466,75.0,50.0,50.0,50.0,65.9090909090909,50.0,50.0,50.0,55.11363636363636
8
+ DreamLIP,dreamlip-vitb16,yfcc15m-long,50.0,50.0,50.0,50.0,60.34482758620689,50.0,50.0,50.0,51.29310344827586,65.0,50.0,50.0,50.0,65.9090909090909,50.0,50.0,50.0,53.86363636363636
9
+ DreamLIP,dreamlip-vitb16,cc30m-long,52.85714285714286,50.0,50.0,50.0,67.67241379310346,50.0,50.0,50.0,52.56619458128079,75.0,50.0,50.0,50.0,70.45454545454547,50.0,50.0,50.0,55.68181818181819
10
+ FLAIR,flair-vitb16,cc3m-recap,64.28571428571428,50.0,50.0,50.0,67.67241379310346,50.0,50.0,50.0,53.99476600985221,60.0,50.0,50.0,50.0,59.84848484848485,50.0,50.0,50.0,52.48106060606061
11
+ FLAIR,flair-vitb16,cc12m-recap,54.28571428571428,50.0,50.0,50.0,66.37931034482759,50.0,50.0,50.0,52.58312807881774,70.0,50.0,50.0,50.0,65.9090909090909,50.0,50.0,50.0,54.48863636363636
12
+ FLAIR,flair-vitb16,yfcc15m-recap,60.0,50.0,50.0,50.0,62.93103448275862,50.0,50.0,50.0,52.866379310344826,70.0,50.0,50.0,50.0,64.39393939393939,50.0,50.0,50.0,54.29924242424242
13
+ FLAIR,flair-vitb16,cc30m-recap,54.28571428571428,50.0,50.0,50.0,67.67241379310346,50.0,50.0,50.0,52.74476600985221,70.0,50.0,50.0,50.0,64.39393939393939,50.0,50.0,50.0,54.29924242424242
14
+ FSC-CLIP,fsc-clip-ViT-B-32,laioncoco-ft,55.71428571428572,50.0,50.0,50.0,62.06896551724138,50.0,50.0,50.0,52.22290640394089,80.0,50.0,50.0,50.0,58.333333333333336,50.0,50.0,50.0,54.791666666666664
15
+ FSC-CLIP,fsc-clip-ViT-B-16,laioncoco-ft,54.28571428571428,50.0,50.0,50.0,60.34482758620689,50.0,50.0,50.0,51.82881773399015,75.0,50.0,50.0,50.0,56.81818181818181,50.0,50.0,50.0,53.97727272727273
16
+ FSC-CLIP,fsc-clip-ViT-L-14,laioncoco-ft,60.0,50.0,50.0,50.0,60.77586206896552,50.0,50.0,50.0,52.59698275862069,85.0,50.0,50.0,50.0,50.75757575757576,50.0,50.0,50.0,54.46969696969697
17
+ Jina-CLIP,jina-clip-v1,jinaai,57.14285714285714,54.04040404040404,53.61216730038023,51.4018691588785,68.10344827586206,62.33766233766234,59.09090909090909,71.42857142857143,59.644736096940605,75.0,58.82352941176471,54.166666666666664,58.620689655172406,60.60606060606061,68.42105263157895,64.28571428571429,50.0,61.2404641571197
18
+ Jina-CLIP,jina-clip-v2,jinaai,60.714285714285715,61.868686868686865,52.47148288973384,52.80373831775701,66.8103448275862,62.98701298701299,62.5,60.0,60.01944395063283,70.0,67.64705882352942,48.611111111111114,51.72413793103448,68.18181818181819,72.36842105263159,58.03571428571429,40.0,59.57103267322989
19
+ LoTLIP,LoTLIP-ViT-B-32,lotlip100m,65.71428571428571,47.727272727272734,50.0,50.0,65.08620689655173,55.19480519480519,50.22727272727273,50.0,54.243730407523515,70.0,55.88235294117648,50.0,50.0,72.72727272727273,68.42105263157896,50.0,50.0,58.37883478750352
20
+ LoTLIP,LoTLIP-ViT-B-16,lotlip100m,54.285714285714285,52.272727272727266,50.0,50.0,65.94827586206897,52.59740259740259,49.77272727272727,50.0,53.10960591133005,45.0,42.647058823529406,50.0,50.0,66.66666666666666,47.368421052631575,50.0,50.0,50.210268317853455
21
+ LongCLIP,longclip-vitb32,sharegpt4v-1m,51.42857142857143,50.505050505050505,46.00760456273764,50.0,53.66379310344827,51.94805194805194,60.0,50.0,51.69413394348248,37.5,59.55882352941177,60.41666666666667,50.0,56.81818181818181,65.78947368421052,62.5,50.0,55.32289321230885
22
+ LongCLIP,longclip-vitb16,sharegpt4v-1m,47.14285714285714,52.02020202020202,47.338403041825096,50.0,57.54310344827586,57.467532467532465,60.227272727272734,50.0,52.71742135599567,65.0,52.205882352941174,45.138888888888886,50.0,50.75757575757575,61.84210526315789,61.60714285714286,50.0,54.56894938996332
23
+ LongCLIP,longclip-vitl14,sharegpt4v-1m,58.57142857142857,61.868686868686865,56.083650190114064,50.0,56.25,58.76623376623377,55.90909090909091,50.0,55.93113628819427,70.0,65.44117647058823,50.69444444444444,50.0,59.09090909090909,55.26315789473684,61.607142857142854,50.0,57.762103844727676
24
+ LongCLIP,longclip-vitl14_336px,sharegpt4v-1m,60.0,59.84848484848485,54.37262357414449,50.0,54.956896551724135,52.92207792207792,53.63636363636364,50.0,54.46705581659938,70.0,65.44117647058823,50.0,50.0,48.484848484848484,76.31578947368422,58.035714285714285,50.0,58.5346910893544
25
+ OpenCLIP,roberta-ViT-B-32,laion2b_s12b_b32k,68.57142857142857,50.0,50.0,50.0,64.87068965517241,50.0,50.0,50.0,54.180264778325125,75.0,50.0,50.0,50.0,55.3030303030303,50.0,50.0,50.0,53.78787878787879
26
+ OpenCLIP,coca_ViT-B-32,laion2b_s13b_b90k,65.71428571428572,50.0,50.0,50.0,64.22413793103448,50.0,50.0,50.0,53.742302955665025,72.5,50.0,50.0,50.0,59.09090909090909,50.0,50.0,50.0,53.94886363636364
27
+ OpenCLIP,coca_ViT-L-14,laion2b_s13b_b90k,60.0,50.0,50.0,50.0,61.20689655172414,50.0,50.0,50.0,52.650862068965516,57.5,50.0,50.0,50.0,62.121212121212125,50.0,50.0,50.0,52.452651515151516
28
+ OpenCLIP,ViT-H-14,laion2b_s32b_b79k,67.14285714285714,50.0,50.0,50.0,65.94827586206897,50.0,50.0,50.0,54.136391625615765,70.0,50.0,50.0,50.0,64.39393939393939,50.0,50.0,50.0,54.29924242424242
29
+ OpenCLIP,ViT-L-14,laion2b_s32b_b82k,60.0,50.0,50.0,50.0,59.91379310344828,50.0,50.0,50.0,52.48922413793103,65.0,50.0,50.0,50.0,64.39393939393939,50.0,50.0,50.0,53.67424242424242
30
+ OpenCLIP,ViT-B-32,laion2b_s34b_b79k,55.71428571428572,50.0,50.0,50.0,64.22413793103448,50.0,50.0,50.0,52.492302955665025,70.0,50.0,50.0,50.0,61.36363636363637,50.0,50.0,50.0,53.92045454545455
31
+ OpenCLIP,ViT-B-16,laion2b_s34b_b88k,60.0,50.0,50.0,50.0,58.189655172413794,50.0,50.0,50.0,52.27370689655172,75.0,50.0,50.0,50.0,61.36363636363637,50.0,50.0,50.0,54.54545454545455
32
+ OpenCLIP,ViT-g-14,laion2b_s34b_b88k,55.71428571428572,50.0,50.0,50.0,61.63793103448276,50.0,50.0,50.0,52.16902709359606,80.0,50.0,50.0,50.0,58.333333333333336,50.0,50.0,50.0,54.791666666666664
33
+ OpenCLIP,ViT-B-16,openai,58.57142857142857,50.0,50.0,50.0,59.48275862068965,50.0,50.0,50.0,52.256773399014776,70.0,50.0,50.0,50.0,58.333333333333336,50.0,50.0,50.0,53.541666666666664
34
+ OpenCLIP,ViT-B-32,openai,61.42857142857143,50.0,50.0,50.0,60.34482758620689,50.0,50.0,50.0,52.72167487684729,65.0,50.0,50.0,50.0,56.81818181818181,50.0,50.0,50.0,52.72727272727273
35
+ OpenCLIP,ViT-L-14,openai,57.14285714285714,50.0,50.0,50.0,61.20689655172414,50.0,50.0,50.0,52.29371921182266,65.0,50.0,50.0,50.0,47.72727272727273,50.0,50.0,50.0,51.59090909090909
36
+ OpenCLIP,ViT-L-14-336,openai,60.0,50.0,50.0,50.0,62.93103448275862,50.0,50.0,50.0,52.866379310344826,65.0,50.0,50.0,50.0,52.27272727272727,50.0,50.0,50.0,52.15909090909091
37
+ OpenCLIP,ViT-B-16-SigLIP,webli,50.714285714285715,50.0,50.0,50.0,60.34482758620689,50.0,50.0,50.0,51.38238916256158,47.5,50.0,50.0,50.0,62.121212121212125,50.0,50.0,50.0,51.202651515151516
38
+ OpenCLIP,ViT-B-16-SigLIP-384,webli,50.714285714285715,50.0,50.0,50.0,60.34482758620689,50.0,50.0,50.0,51.38238916256158,47.5,50.0,50.0,50.0,60.60606060606061,50.0,50.0,50.0,51.01325757575758
39
+ OpenCLIP,ViT-L-16-SigLIP-256,webli,55.0,50.0,50.0,50.0,60.77586206896551,50.0,50.0,50.0,51.97198275862069,52.5,50.0,50.0,50.0,59.09090909090909,50.0,50.0,50.0,51.44886363636364
40
+ OpenCLIP,ViT-L-16-SigLIP-384,webli,49.285714285714285,50.0,50.0,50.0,60.77586206896551,50.0,50.0,50.0,51.257697044334975,62.5,50.0,50.0,50.0,59.09090909090909,50.0,50.0,50.0,52.69886363636364
41
+ OpenCLIP,ViT-SO400M-14-SigLIP,webli,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0
42
+ Recap-CLIP,ViT-L-16-HTxt-Recap-CLIP,recap-datacomp1b,52.85714285714286,53.03030303030303,50.0,50.0,63.793103448275865,58.44155844155844,50.22727272727273,50.0,53.54367256306912,85.0,52.205882352941174,50.0,50.0,63.63636363636363,55.26315789473684,50.0,50.0,57.0131754855052
43
+ StructuredCLIP,NegCLIP-ViT-B-32,coco-ft,71.42857142857143,50.0,50.0,50.0,57.758620689655174,50.0,50.0,50.0,53.64839901477833,70.0,50.0,50.0,50.0,59.84848484848485,50.0,50.0,50.0,53.73106060606061
44
+ StructuredCLIP,CE-CLIP-ViT-B-32,coco-ft,68.57142857142857,50.0,50.0,50.0,59.48275862068965,50.0,50.0,50.0,53.506773399014776,70.0,50.0,50.0,50.0,67.42424242424241,50.0,50.0,50.0,54.6780303030303
45
+ StructuredCLIP,DAC-LLM-ViT-B-32,cc3m-ft,64.28571428571428,50.0,50.0,50.0,59.91379310344828,50.0,50.0,50.0,53.02493842364532,85.0,50.0,50.0,50.0,62.878787878787875,50.0,50.0,50.0,55.984848484848484
46
+ StructuredCLIP,DAC-SAM-ViT-B-32,cc3m-ft,60.0,50.0,50.0,50.0,53.01724137931035,50.0,50.0,50.0,51.627155172413794,75.0,50.0,50.0,50.0,58.333333333333336,50.0,50.0,50.0,54.166666666666664
data/250126/summary.csv ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ "('Model', 'family')","('Model', 'model')","('Model', 'tag')","('length_group', 'XS')","('length_group', 'S')","('length_group', 'M')","('length_group', 'L')","('neg_type', 'relation')","('neg_type', 'attribute')","('Avg', 'Avg')","('length_group.1', 'XS.1')","('length_group.1', 'S.1')","('length_group.1', 'M.1')","('length_group.1', 'L.1')","('neg_type.1', 'relation.1')","('neg_type.1', 'attribute.1')","('Avg.1', 'Avg.1')"
2
+ ALIGN,align-base,coyo700m,59.71982759,50,50,50,51.25,53.60991379,52.4299569,64.96212121,50,50,50,53.125,54.35606061,53.7405303
3
+ CLIPS,CLIPS-Large-14-224,recap-datacomp1b,60.75123153,50.32467532,50,50,52.14285714,53.39509628,52.76897671,66.93181818,50,50,50,55.625,52.84090909,54.23295455
4
+ CLIPS,CLIPS-Large-14-336,recap-datacomp1b,57.74630542,50.32467532,50,50,51.07142857,52.9640618,52.01774519,65.41666667,49.26470588,50,50,55.25735294,52.08333333,53.67034314
5
+ CLIPS,CLIPS-Huge-14-224,recap-datacomp1b,59.11945813,49.67532468,50,50,50.35714286,54.04024854,52.1986957,63.21969697,49.26470588,50,50,51.50735294,54.73484848,53.12110071
6
+ DreamLIP,dreamlip-vitb16,cc3m-long,59.95689655,50,50,50,52.5,52.47844828,52.48922414,55.90909091,50,50,50,51.25,51.70454545,51.47727273
7
+ DreamLIP,dreamlip-vitb16,cc12m-long,53.16502463,50,50,50,49.64285714,51.93965517,50.79125616,70.45454545,50,50,50,56.25,53.97727273,55.11363636
8
+ DreamLIP,dreamlip-vitb16,yfcc15m-long,55.17241379,50,50,50,50,52.5862069,51.29310345,65.45454545,50,50,50,53.75,53.97727273,53.86363636
9
+ DreamLIP,dreamlip-vitb16,cc30m-long,60.26477833,50,50,50,50.71428571,54.41810345,52.56619458,72.72727273,50,50,50,56.25,55.11363636,55.68181818
10
+ FLAIR,flair-vitb16,cc3m-recap,65.97906404,50,50,50,53.57142857,54.41810345,53.99476601,59.92424242,50,50,50,52.5,52.46212121,52.48106061
11
+ FLAIR,flair-vitb16,cc12m-recap,60.33251232,50,50,50,51.07142857,54.09482759,52.58312808,67.95454545,50,50,50,55,53.97727273,54.48863636
12
+ FLAIR,flair-vitb16,yfcc15m-recap,61.46551724,50,50,50,52.5,53.23275862,52.86637931,67.1969697,50,50,50,55,53.59848485,54.29924242
13
+ FLAIR,flair-vitb16,cc30m-recap,60.97906404,50,50,50,51.07142857,54.41810345,52.74476601,67.1969697,50,50,50,55,53.59848485,54.29924242
14
+ FSC-CLIP,fsc-clip-ViT-B-32,laioncoco-ft,58.89162562,50,50,50,51.42857143,53.01724138,52.2229064,69.16666667,50,50,50,57.5,52.08333333,54.79166667
15
+ FSC-CLIP,fsc-clip-ViT-B-16,laioncoco-ft,57.31527094,50,50,50,51.07142857,52.5862069,51.82881773,65.90909091,50,50,50,56.25,51.70454545,53.97727273
16
+ FSC-CLIP,fsc-clip-ViT-L-14,laioncoco-ft,60.38793103,50,50,50,52.5,52.69396552,52.59698276,67.87878788,50,50,50,58.75,50.18939394,54.46969697
17
+ Jina-CLIP,jina-clip-v1,jinaai,62.62315271,58.18903319,56.3515382,61.41522029,54.04932441,65.24014778,59.6447361,67.8030303,63.62229102,59.22619048,54.31034483,61.65272143,60.82820688,61.24046416
18
+ Jina-CLIP,jina-clip-v2,jinaai,63.76231527,62.42784993,57.48574144,56.40186916,56.96454845,63.07433945,60.01944395,69.09090909,70.00773994,53.3234127,45.86206897,59.49557697,59.64648838,59.57103267
19
+ LoTLIP,LoTLIP-ViT-B-32,lotlip100m,65.40024631,51.46103896,50.11363636,50,53.36038961,55.1270712,54.24373041,71.36363636,62.15170279,50,50,56.47058824,60.28708134,58.37883479
20
+ LoTLIP,LoTLIP-ViT-B-16,lotlip100m,60.11699507,52.43506494,49.88636364,50,51.63961039,54.57960143,53.10960591,55.83333333,45.00773994,50,50,46.91176471,53.50877193,50.21026832
21
+ LongCLIP,longclip-vitb32,sharegpt4v-1m,52.54618227,51.22655123,53.00380228,50,49.48530662,53.90296126,51.69413394,47.15909091,62.67414861,61.45833333,50,51.86887255,58.77691388,55.32289321
22
+ LongCLIP,longclip-vitb16,sharegpt4v-1m,52.3429803,54.74386724,53.78283788,50,49.12536555,56.30947716,52.71742136,57.87878788,57.02399381,53.37301587,50,53.08619281,56.05170597,54.56894939
23
+ LongCLIP,longclip-vitl14,sharegpt4v-1m,57.41071429,60.31746032,55.99637055,50,56.63094141,55.23133117,55.93113629,64.54545455,60.35216718,56.15079365,50,59.03390523,56.49030246,57.76210384
24
+ LongCLIP,longclip-vitl14_336px,sharegpt4v-1m,57.47844828,56.38528139,54.00449361,50,56.05527711,52.87883453,54.46705582,59.24242424,70.87848297,54.01785714,50,58.86029412,58.20908806,58.53469109
25
+ OpenCLIP,roberta-ViT-B-32,laion2b_s12b_b32k,66.72105911,50,50,50,54.64285714,53.71767241,54.18026478,65.15151515,50,50,50,56.25,51.32575758,53.78787879
26
+ OpenCLIP,coca_ViT-B-32,laion2b_s13b_b90k,64.96921182,50,50,50,53.92857143,53.55603448,53.74230296,65.79545455,50,50,50,55.625,52.27272727,53.94886364
27
+ OpenCLIP,coca_ViT-L-14,laion2b_s13b_b90k,60.60344828,50,50,50,52.5,52.80172414,52.65086207,59.81060606,50,50,50,51.875,53.03030303,52.45265152
28
+ OpenCLIP,ViT-H-14,laion2b_s32b_b79k,66.5455665,50,50,50,54.28571429,53.98706897,54.13639163,67.1969697,50,50,50,55,53.59848485,54.29924242
29
+ OpenCLIP,ViT-L-14,laion2b_s32b_b82k,59.95689655,50,50,50,52.5,52.47844828,52.48922414,64.6969697,50,50,50,53.75,53.59848485,53.67424242
30
+ OpenCLIP,ViT-B-32,laion2b_s34b_b79k,59.96921182,50,50,50,51.42857143,53.55603448,52.49230296,65.68181818,50,50,50,55,52.84090909,53.92045455
31
+ OpenCLIP,ViT-B-16,laion2b_s34b_b88k,59.09482759,50,50,50,52.5,52.04741379,52.2737069,68.18181818,50,50,50,56.25,52.84090909,54.54545455
32
+ OpenCLIP,ViT-g-14,laion2b_s34b_b88k,58.67610837,50,50,50,51.42857143,52.90948276,52.16902709,69.16666667,50,50,50,57.5,52.08333333,54.79166667
33
+ OpenCLIP,ViT-B-16,openai,59.0270936,50,50,50,52.14285714,52.37068966,52.2567734,64.16666667,50,50,50,55,52.08333333,53.54166667
34
+ OpenCLIP,ViT-B-32,openai,60.88669951,50,50,50,52.85714286,52.5862069,52.72167488,60.90909091,50,50,50,53.75,51.70454545,52.72727273
35
+ OpenCLIP,ViT-L-14,openai,59.17487685,50,50,50,51.78571429,52.80172414,52.29371921,56.36363636,50,50,50,53.75,49.43181818,51.59090909
36
+ OpenCLIP,ViT-L-14-336,openai,61.46551724,50,50,50,52.5,53.23275862,52.86637931,58.63636364,50,50,50,53.75,50.56818182,52.15909091
37
+ OpenCLIP,ViT-B-16-SigLIP,webli,55.52955665,50,50,50,50.17857143,52.5862069,51.38238916,54.81060606,50,50,50,49.375,53.03030303,51.20265152
38
+ OpenCLIP,ViT-B-16-SigLIP-384,webli,55.52955665,50,50,50,50.17857143,52.5862069,51.38238916,54.0530303,50,50,50,49.375,52.65151515,51.01325758
39
+ OpenCLIP,ViT-L-16-SigLIP-256,webli,57.88793103,50,50,50,51.25,52.69396552,51.97198276,55.79545455,50,50,50,50.625,52.27272727,51.44886364
40
+ OpenCLIP,ViT-L-16-SigLIP-384,webli,55.03078818,50,50,50,49.82142857,52.69396552,51.25769704,60.79545455,50,50,50,53.125,52.27272727,52.69886364
41
+ OpenCLIP,ViT-SO400M-14-SigLIP,webli,50,50,50,50,50,50,50,50,50,50,50,50,50,50
42
+ Recap-CLIP,ViT-L-16-HTxt-Recap-CLIP,recap-datacomp1b,58.32512315,55.73593074,50.11363636,50,51.47186147,55.61548365,53.54367256,74.31818182,53.73452012,50,50,59.30147059,54.72488038,57.01317549
43
+ StructuredCLIP,NegCLIP-ViT-B-32,coco-ft,64.59359606,50,50,50,55.35714286,51.93965517,53.64839901,64.92424242,50,50,50,55,52.46212121,53.73106061
44
+ StructuredCLIP,CE-CLIP-ViT-B-32,coco-ft,64.0270936,50,50,50,54.64285714,52.37068966,53.5067734,68.71212121,50,50,50,55,54.35606061,54.6780303
45
+ StructuredCLIP,DAC-LLM-ViT-B-32,cc3m-ft,62.09975369,50,50,50,53.57142857,52.47844828,53.02493842,73.93939394,50,50,50,58.75,53.21969697,55.98484848
46
+ StructuredCLIP,DAC-SAM-ViT-B-32,cc3m-ft,56.50862069,50,50,50,52.5,50.75431034,51.62715517,66.66666667,50,50,50,56.25,52.08333333,54.16666667
pages/overall_acc_250126.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ import streamlit as st
4
+
5
+ st.set_page_config(layout="wide")
6
+ SHORT_CAPTIONS = [
7
+ 'ALIGN:align-base:coyo700m', 'OpenCLIP:ViT-B-32:openai', 'OpenCLIP:ViT-B-16:openai',
8
+ 'OpenCLIP:ViT-L-14:openai', 'OpenCLIP:ViT-L-14-336:openai',
9
+ 'OpenCLIP:ViT-B-32:laion2b_s34b_b79k', 'OpenCLIP:ViT-B-16:laion2b_s34b_b88k',
10
+ 'OpenCLIP:ViT-L-14:laion2b_s32b_b82k', 'OpenCLIP:ViT-g-14:laion2b_s34b_b88k',
11
+ 'OpenCLIP:ViT-H-14:laion2b_s32b_b79k', 'OpenCLIP:roberta-ViT-B-32:laion2b_s12b_b32k',
12
+ 'OpenCLIP:ViT-B-16-SigLIP:webli', 'OpenCLIP:ViT-B-16-SigLIP-384:webli',
13
+ 'OpenCLIP:ViT-L-16-SigLIP-256:webli', 'OpenCLIP:ViT-L-16-SigLIP-384:webli',
14
+ 'OpenCLIP:ViT-SO400M-14-SigLIP:webli', 'OpenCLIP:coca_ViT-B-32:laion2b_s13b_b90k',
15
+ 'OpenCLIP:coca_ViT-L-14:laion2b_s13b_b90k'
16
+ ]
17
+ LONG_CAPTIONS = [
18
+ 'DreamLIP:dreamlip-vitb16:cc3m-long', 'DreamLIP:dreamlip-vitb16:cc12m-long',
19
+ 'DreamLIP:dreamlip-vitb16:yfcc15m-long', 'DreamLIP:dreamlip-vitb16:cc30m-long',
20
+ 'FLAIR:flair-vitb16:cc3m-recap', 'FLAIR:flair-vitb16:cc12m-recap',
21
+ 'FLAIR:flair-vitb16:yfcc15m-recap', 'FLAIR:flair-vitb16:cc30m-recap',
22
+ 'CLIPS:CLIPS-Large-14-224:recap-datacomp1b', 'CLIPS:CLIPS-Large-14-336:recap-datacomp1b',
23
+ 'CLIPS:CLIPS-Huge-14-224:recap-datacomp1b', 'LoTLIP:LoTLIP-ViT-B-32:lotlip100m',
24
+ 'LoTLIP:LoTLIP-ViT-B-16:lotlip100m', 'Recap-CLIP:ViT-L-16-HTxt-Recap-CLIP:recap-datacomp1b',
25
+ 'LongCLIP:longclip-vitb32:sharegpt4v-1m', 'LongCLIP:longclip-vitb16:sharegpt4v-1m',
26
+ 'LongCLIP:longclip-vitl14:sharegpt4v-1m', 'LongCLIP:longclip-vitl14_336px:sharegpt4v-1m',
27
+ 'Jina-CLIP:jina-clip-v1:jinaai', 'Jina-CLIP:jina-clip-v2:jinaai'
28
+ ]
29
+ COMPOSITIONALITY = [
30
+ "OpenCLIP:ViT-B-32:openai", 'StructuredCLIP:NegCLIP-ViT-B-32:coco-ft',
31
+ 'StructuredCLIP:CE-CLIP-ViT-B-32:coco-ft', 'StructuredCLIP:DAC-LLM-ViT-B-32:cc3m-ft',
32
+ 'StructuredCLIP:DAC-SAM-ViT-B-32:cc3m-ft', 'FSC-CLIP:fsc-clip-ViT-B-32:laioncoco-ft',
33
+ 'FSC-CLIP:fsc-clip-ViT-B-16:laioncoco-ft', 'FSC-CLIP:fsc-clip-ViT-L-14:laioncoco-ft'
34
+ ]
35
+
36
+ DECODERS = [
37
+ 'vqascore:instructblip-flant5-xl:none', 'vqascore:clip-flant5-xl:none',
38
+ 'vqascore:llava-v1.5-7b:none', 'vqascore:sharegpt4v-7b:none',
39
+ 'visualgptscore:instructblip-flant5-xl:none', 'visualgptscore:clip-flant5-xl:none',
40
+ 'visualgptscore:llava-v1.5-7b:none', 'visualgptscore:sharegpt4v-7b:none'
41
+ ]
42
+
43
+ MODEL_GROUPS = {
44
+ "short_captions": SHORT_CAPTIONS,
45
+ "long_captions": LONG_CAPTIONS,
46
+ "compositionality": COMPOSITIONALITY
47
+ }
48
+
49
+
50
+ def format_df(df):
51
+ cols = []
52
+ for col in df.columns:
53
+ if col in ["family", "model", "tag"]:
54
+ continue
55
+ cols.append(col)
56
+ formatted_df = df.style.format({col: "{:.1f}" for col in cols})
57
+ return formatted_df
58
+
59
+
60
+ def print_table_overall(df, model_names):
61
+ named_rows = df[["family", "model", "tag"]].apply(lambda row: ":".join(row), axis=1)
62
+ new_rows = []
63
+ for name in model_names:
64
+ new_rows.append(df[named_rows == name])
65
+ new_rows = format_df(pd.concat(new_rows, axis=0))
66
+ st.table(new_rows)
67
+
68
+
69
+ # Streamlit app
70
+ def main():
71
+ st.title("Interface")
72
+ df = pd.read_csv("data/250126/overall.csv")
73
+ for group, model_names in MODEL_GROUPS.items():
74
+ st.markdown(f"## {group} models")
75
+ print_table_overall(df, model_names)
76
+
77
+ df = pd.read_csv("data/250126/decoder_overall_valid.csv")
78
+ st.markdown("## Decoder-based models (VALID)")
79
+ print_table_overall(df, DECODERS)
80
+
81
+ df = pd.read_csv("data/250126/decoder_overall_invalid.csv")
82
+ st.markdown("## Decoder-based models (INVALID)")
83
+ print_table_overall(df, DECODERS)
84
+
85
+
86
+ if __name__ == "__main__":
87
+ main()
pages/summary_acc_250126.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ast
2
+
3
+ import pandas as pd
4
+
5
+ import streamlit as st
6
+
7
+ st.set_page_config(layout="wide")
8
+ SHORT_CAPTIONS = [
9
+ 'ALIGN:align-base:coyo700m', 'OpenCLIP:ViT-B-32:openai', 'OpenCLIP:ViT-B-16:openai',
10
+ 'OpenCLIP:ViT-L-14:openai', 'OpenCLIP:ViT-L-14-336:openai',
11
+ 'OpenCLIP:ViT-B-32:laion2b_s34b_b79k', 'OpenCLIP:ViT-B-16:laion2b_s34b_b88k',
12
+ 'OpenCLIP:ViT-L-14:laion2b_s32b_b82k', 'OpenCLIP:ViT-g-14:laion2b_s34b_b88k',
13
+ 'OpenCLIP:ViT-H-14:laion2b_s32b_b79k', 'OpenCLIP:roberta-ViT-B-32:laion2b_s12b_b32k',
14
+ 'OpenCLIP:ViT-B-16-SigLIP:webli', 'OpenCLIP:ViT-B-16-SigLIP-384:webli',
15
+ 'OpenCLIP:ViT-L-16-SigLIP-256:webli', 'OpenCLIP:ViT-L-16-SigLIP-384:webli',
16
+ 'OpenCLIP:ViT-SO400M-14-SigLIP:webli', 'OpenCLIP:coca_ViT-B-32:laion2b_s13b_b90k',
17
+ 'OpenCLIP:coca_ViT-L-14:laion2b_s13b_b90k'
18
+ ]
19
+ LONG_CAPTIONS = [
20
+ 'DreamLIP:dreamlip-vitb16:cc3m-long', 'DreamLIP:dreamlip-vitb16:cc12m-long',
21
+ 'DreamLIP:dreamlip-vitb16:yfcc15m-long', 'DreamLIP:dreamlip-vitb16:cc30m-long',
22
+ "FLAIR:flair-vitb16:cc3m-recap", "FLAIR:flair-vitb16:cc12m-recap",
23
+ "FLAIR:flair-vitb16:yfcc15m-recap", "FLAIR:flair-vitb16:cc30m-recap",
24
+ 'CLIPS:CLIPS-Large-14-224:recap-datacomp1b', 'CLIPS:CLIPS-Large-14-336:recap-datacomp1b',
25
+ 'CLIPS:CLIPS-Huge-14-224:recap-datacomp1b', 'LoTLIP:LoTLIP-ViT-B-32:lotlip100m',
26
+ 'LoTLIP:LoTLIP-ViT-B-16:lotlip100m', 'Recap-CLIP:ViT-L-16-HTxt-Recap-CLIP:recap-datacomp1b',
27
+ 'LongCLIP:longclip-vitb32:sharegpt4v-1m', 'LongCLIP:longclip-vitb16:sharegpt4v-1m',
28
+ 'LongCLIP:longclip-vitl14:sharegpt4v-1m', 'LongCLIP:longclip-vitl14_336px:sharegpt4v-1m',
29
+ 'Jina-CLIP:jina-clip-v1:jinaai', 'Jina-CLIP:jina-clip-v2:jinaai'
30
+ ]
31
+ COMPOSITIONALITY = [
32
+ 'OpenCLIP:ViT-B-32:openai', 'StructuredCLIP:NegCLIP-ViT-B-32:coco-ft',
33
+ 'StructuredCLIP:CE-CLIP-ViT-B-32:coco-ft', 'StructuredCLIP:DAC-LLM-ViT-B-32:cc3m-ft',
34
+ 'StructuredCLIP:DAC-SAM-ViT-B-32:cc3m-ft', 'FSC-CLIP:fsc-clip-ViT-B-32:laioncoco-ft',
35
+ 'FSC-CLIP:fsc-clip-ViT-B-16:laioncoco-ft', 'FSC-CLIP:fsc-clip-ViT-L-14:laioncoco-ft'
36
+ ]
37
+
38
+ DECODERS = [
39
+ 'vqascore:instructblip-flant5-xl:none', 'vqascore:clip-flant5-xl:none',
40
+ 'vqascore:llava-v1.5-7b:none', 'vqascore:sharegpt4v-7b:none',
41
+ 'visualgptscore:instructblip-flant5-xl:none', 'visualgptscore:clip-flant5-xl:none',
42
+ 'visualgptscore:llava-v1.5-7b:none', 'visualgptscore:sharegpt4v-7b:none'
43
+ ]
44
+
45
+ MODEL_GROUPS = {
46
+ "short_captions": SHORT_CAPTIONS,
47
+ "long_captions": LONG_CAPTIONS,
48
+ "compositionality": COMPOSITIONALITY
49
+ }
50
+
51
+
52
+ def render_mi_table(df, level0_cols):
53
+ # HTML 스타일 정의
54
+ table_style = """
55
+ <style>
56
+ table {
57
+ width: 100%;
58
+ border-collapse: collapse;
59
+ }
60
+ th, td {
61
+ border: 1px solid black;
62
+ text-align: center;
63
+ padding: 8px;
64
+ }
65
+ th {
66
+ background-color: #262730;
67
+ }
68
+ </style>
69
+ """
70
+
71
+ # 상위 헤더 (레벨 0)
72
+ header_html = "<tr>"
73
+ for col in level0_cols:
74
+ colspan = len(df.xs(col, axis=1, level=0).columns) if col else 1
75
+ header_html += f'<th colspan="{colspan}" style="text-align: center;">{col if col else ""}</th>'
76
+ header_html += "</tr>"
77
+
78
+ # 하위 헤더 (레벨 1)
79
+ sub_header_html = "<tr>"
80
+ for col in df.columns:
81
+ sub_header_html += f"<th style='text-align: center;'>{col[1] if len(col) > 1 else col[0]}</th>"
82
+ sub_header_html += "</tr>"
83
+
84
+ # 데이터 HTML 생성
85
+ def map_val(value):
86
+ try:
87
+ value = f"{float(value):.1f}"
88
+ except:
89
+ value = value
90
+ return value
91
+
92
+ rows_html = ""
93
+ for _, row in df.iterrows():
94
+
95
+ rows_html += "<tr>" + "".join(f"<td>{map_val(value)}</td>" for value in row) + "</tr>"
96
+
97
+ # 최종 HTML 합치기
98
+ table_html = f"""
99
+ {table_style}
100
+ <table>
101
+ {header_html}
102
+ {sub_header_html}
103
+ {rows_html}
104
+ </table>
105
+ """
106
+ return table_html
107
+
108
+
109
+ def format_df(df):
110
+ cols = []
111
+ for col in df.columns:
112
+ if col in [("Model", "family"), ("Model", "model"), ("Model", "tag")]:
113
+ continue
114
+ cols.append(col)
115
+ formatted_df = df.style.format({col: "{:.1f}" for col in cols})
116
+ return formatted_df
117
+
118
+
119
+ def print_table(df):
120
+ level0_cols = []
121
+ for col in df.columns:
122
+ if col[0] not in level0_cols:
123
+ level0_cols.append(col[0])
124
+ st.markdown(render_mi_table(df, level0_cols), unsafe_allow_html=True)
125
+
126
+
127
+ def print_table_summary(df, model_names):
128
+ columns = [("Model", "family"), ("Model", "model"), ("Model", "tag")]
129
+ named_rows = df[columns].apply(lambda row: ":".join(row), axis=1)
130
+ new_rows = []
131
+ for name in model_names:
132
+ new_rows.append(df[named_rows == name])
133
+ new_rows = pd.concat(new_rows, axis=0)
134
+ new_rows.columns = pd.MultiIndex.from_tuples(new_rows.columns)
135
+ print_table(new_rows)
136
+
137
+
138
+ # Streamlit app
139
+ def main():
140
+ st.title("Interface")
141
+ df = pd.read_csv("data/250126/summary.csv")
142
+ df.columns = [ast.literal_eval(col) for col in df.columns]
143
+ for group, model_names in MODEL_GROUPS.items():
144
+ st.markdown(f"## {group} models")
145
+ if group == "short_captions":
146
+ pass
147
+
148
+ if group == "long_captions":
149
+ pass
150
+ if group == "compositionality":
151
+ pass
152
+ print_table_summary(df, model_names)
153
+
154
+ df = pd.read_csv("data/250126/decoder_summary_valid.csv")
155
+ df.columns = [ast.literal_eval(col) for col in df.columns]
156
+ st.markdown("## Decoder-based models (VALID SAMPLES)")
157
+ print_table_summary(df, DECODERS)
158
+
159
+ df = pd.read_csv("data/250126/decoder_summary_invalid.csv")
160
+ df.columns = [ast.literal_eval(col) for col in df.columns]
161
+ st.markdown("## Decoder-based models (INVALID SAMPLES)")
162
+ print_table_summary(df, DECODERS)
163
+
164
+
165
+ if __name__ == "__main__":
166
+ main()