|
{ |
|
"time": "241031154353", |
|
"results": { |
|
"Claude3-Sonnet": { |
|
"META": { |
|
"Method": [ |
|
"Claude3-Sonnet", |
|
"https://aws.amazon.com/bedrock/claude/" |
|
], |
|
"Parameters": "", |
|
"Org": "Anthropic", |
|
"OpenSource": "No", |
|
"Verified": "Yes" |
|
}, |
|
"Shopping Concept Understanding": { |
|
"Overall": 80.75 |
|
}, |
|
"Shopping Knowledge Reasoning": { |
|
"Overall": 71.63 |
|
}, |
|
"User Behavior Alignment": { |
|
"Overall": 70.17 |
|
}, |
|
"Multi-lingual Abilities": { |
|
"Overall": 67.76 |
|
} |
|
}, |
|
"Claude2": { |
|
"META": { |
|
"Method": [ |
|
"Claude2", |
|
"https://aws.amazon.com/bedrock/claude/" |
|
], |
|
"Parameters": "", |
|
"Org": "Anthropic", |
|
"OpenSource": "No", |
|
"Verified": "Yes" |
|
}, |
|
"Shopping Concept Understanding": { |
|
"Overall": 75.46 |
|
}, |
|
"Shopping Knowledge Reasoning": { |
|
"Overall": 65.5 |
|
}, |
|
"User Behavior Alignment": { |
|
"Overall": 63.53 |
|
}, |
|
"Multi-lingual Abilities": { |
|
"Overall": 65.24 |
|
} |
|
}, |
|
"ChatGPT": { |
|
"META": { |
|
"Method": [ |
|
"ChatGPT", |
|
"https://platform.openai.com/docs/models#gpt-3-5-turbo" |
|
], |
|
"Parameters": "", |
|
"Org": "OpenAI", |
|
"OpenSource": "No", |
|
"Verified": "Yes" |
|
}, |
|
"Shopping Concept Understanding": { |
|
"Overall": 75.63 |
|
}, |
|
"Shopping Knowledge Reasoning": { |
|
"Overall": 64.97 |
|
}, |
|
"User Behavior Alignment": { |
|
"Overall": 59.79 |
|
}, |
|
"Multi-lingual Abilities": { |
|
"Overall": 60.81 |
|
} |
|
}, |
|
"LLaMA3-70B-Instruct": { |
|
"META": { |
|
"Method": [ |
|
"LLaMA3-70B-Instruct", |
|
"https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct" |
|
], |
|
"Parameters": "70B", |
|
"Org": "Meta", |
|
"OpenSource": "Yes", |
|
"Verified": "Yes" |
|
}, |
|
"Shopping Concept Understanding": { |
|
"Overall": 75.24 |
|
}, |
|
"Shopping Knowledge Reasoning": { |
|
"Overall": 69.29 |
|
}, |
|
"User Behavior Alignment": { |
|
"Overall": 67.67 |
|
}, |
|
"Multi-lingual Abilities": { |
|
"Overall": 62.0 |
|
} |
|
}, |
|
"QWen1.5-72B": { |
|
"META": { |
|
"Method": [ |
|
"QWen1.5-72B", |
|
"https://huggingface.co/Qwen/Qwen1.5-72B" |
|
], |
|
"Parameters": "72B", |
|
"Org": "Alibaba", |
|
"OpenSource": "Yes", |
|
"Verified": "Yes" |
|
}, |
|
"Shopping Concept Understanding": { |
|
"Overall": 71.67 |
|
}, |
|
"Shopping Knowledge Reasoning": { |
|
"Overall": 68.92 |
|
}, |
|
"User Behavior Alignment": { |
|
"Overall": 64.12 |
|
}, |
|
"Multi-lingual Abilities": { |
|
"Overall": 64.84 |
|
} |
|
}, |
|
"LLaMA3-70B": { |
|
"META": { |
|
"Method": [ |
|
"LLaMA3-70B", |
|
"https://huggingface.co/meta-llama/Meta-Llama-3-70B" |
|
], |
|
"Parameters": "70B", |
|
"Org": "Meta", |
|
"OpenSource": "Yes", |
|
"Verified": "Yes" |
|
}, |
|
"Shopping Concept Understanding": { |
|
"Overall": 69.59 |
|
}, |
|
"Shopping Knowledge Reasoning": { |
|
"Overall": 63.56 |
|
}, |
|
"User Behavior Alignment": { |
|
"Overall": 55.77 |
|
}, |
|
"Multi-lingual Abilities": { |
|
"Overall": 58.95 |
|
} |
|
}, |
|
"LLaMA2-70B-Chat": { |
|
"META": { |
|
"Method": [ |
|
"LLaMA2-70B-Chat", |
|
"https://huggingface.co/meta-llama/Llama-2-70b-chat-hf" |
|
], |
|
"Parameters": "70B", |
|
"Org": "Meta", |
|
"OpenSource": "Yes", |
|
"Verified": "Yes" |
|
}, |
|
"Shopping Concept Understanding": { |
|
"Overall": 61.84 |
|
}, |
|
"Shopping Knowledge Reasoning": { |
|
"Overall": 40.73 |
|
}, |
|
"User Behavior Alignment": { |
|
"Overall": 44.2 |
|
}, |
|
"Multi-lingual Abilities": { |
|
"Overall": 47.04 |
|
} |
|
}, |
|
"LLaMA2-70B": { |
|
"META": { |
|
"Method": [ |
|
"LLaMA2-70B", |
|
"https://huggingface.co/meta-llama/Llama-2-70b-hf" |
|
], |
|
"Parameters": "70B", |
|
"Org": "Meta", |
|
"OpenSource": "Yes", |
|
"Verified": "Yes" |
|
}, |
|
"Shopping Concept Understanding": { |
|
"Overall": 61.05 |
|
}, |
|
"Shopping Knowledge Reasoning": { |
|
"Overall": 55.87 |
|
}, |
|
"User Behavior Alignment": { |
|
"Overall": 43.24 |
|
}, |
|
"Multi-lingual Abilities": { |
|
"Overall": 47.85 |
|
} |
|
}, |
|
"Mixtral-8x7B": { |
|
"META": { |
|
"Method": [ |
|
"Mixtral-8x7B", |
|
"https://huggingface.co/mistralai/Mixtral-8x7B-v0.1" |
|
], |
|
"Parameters": "46.7B", |
|
"Org": "MistralAI", |
|
"OpenSource": "Yes", |
|
"Verified": "Yes" |
|
}, |
|
"Shopping Concept Understanding": { |
|
"Overall": 59.43 |
|
}, |
|
"Shopping Knowledge Reasoning": { |
|
"Overall": 54.32 |
|
}, |
|
"User Behavior Alignment": { |
|
"Overall": 55.31 |
|
}, |
|
"Multi-lingual Abilities": { |
|
"Overall": 44.69 |
|
} |
|
}, |
|
"QWen1.5-14B": { |
|
"META": { |
|
"Method": [ |
|
"QWen1.5-14B", |
|
"https://huggingface.co/Qwen/Qwen1.5-14B" |
|
], |
|
"Parameters": "14B", |
|
"Org": "Alibaba", |
|
"OpenSource": "Yes", |
|
"Verified": "Yes" |
|
}, |
|
"Shopping Concept Understanding": { |
|
"Overall": 67.22 |
|
}, |
|
"Shopping Knowledge Reasoning": { |
|
"Overall": 60.92 |
|
}, |
|
"User Behavior Alignment": { |
|
"Overall": 54.92 |
|
}, |
|
"Multi-lingual Abilities": { |
|
"Overall": 55.21 |
|
} |
|
}, |
|
"eCeLLM-L": { |
|
"META": { |
|
"Method": [ |
|
"eCeLLM-L", |
|
"https://huggingface.co/NingLab/eCeLLM-L" |
|
], |
|
"Parameters": "13B", |
|
"Org": "OSU NingLab", |
|
"OpenSource": "Yes", |
|
"Verified": "Yes" |
|
}, |
|
"Shopping Concept Understanding": { |
|
"Overall": 61.54 |
|
}, |
|
"Shopping Knowledge Reasoning": { |
|
"Overall": 54.84 |
|
}, |
|
"User Behavior Alignment": { |
|
"Overall": 54.55 |
|
}, |
|
"Multi-lingual Abilities": { |
|
"Overall": 59.64 |
|
} |
|
}, |
|
"Vicuna-13B-v1.5": { |
|
"META": { |
|
"Method": [ |
|
"Vicuna-13B-v1.5", |
|
"https://huggingface.co/lmsys/vicuna-13b-v1.5" |
|
], |
|
"Parameters": "13B", |
|
"Org": "LMSys", |
|
"OpenSource": "Yes", |
|
"Verified": "Yes" |
|
}, |
|
"Shopping Concept Understanding": { |
|
"Overall": 59.64 |
|
}, |
|
"Shopping Knowledge Reasoning": { |
|
"Overall": 52.63 |
|
}, |
|
"User Behavior Alignment": { |
|
"Overall": 49.81 |
|
}, |
|
"Multi-lingual Abilities": { |
|
"Overall": 49.64 |
|
} |
|
}, |
|
"LLaMA2-13B-Chat": { |
|
"META": { |
|
"Method": [ |
|
"LLaMA2-13B-Chat", |
|
"https://huggingface.co/meta-llama/Llama-2-13b-chat-hf" |
|
], |
|
"Parameters": "13B", |
|
"Org": "Meta", |
|
"OpenSource": "Yes", |
|
"Verified": "Yes" |
|
}, |
|
"Shopping Concept Understanding": { |
|
"Overall": 51.79 |
|
}, |
|
"Shopping Knowledge Reasoning": { |
|
"Overall": 45.01 |
|
}, |
|
"User Behavior Alignment": { |
|
"Overall": 39.95 |
|
}, |
|
"Multi-lingual Abilities": { |
|
"Overall": 42.99 |
|
} |
|
}, |
|
"LLaMA2-13B": { |
|
"META": { |
|
"Method": [ |
|
"LLaMA2-13B", |
|
"https://huggingface.co/meta-llama/Llama-2-13b-hf" |
|
], |
|
"Parameters": "13B", |
|
"Org": "Meta", |
|
"OpenSource": "Yes", |
|
"Verified": "Yes" |
|
}, |
|
"Shopping Concept Understanding": { |
|
"Overall": 45.86 |
|
}, |
|
"Shopping Knowledge Reasoning": { |
|
"Overall": 39.47 |
|
}, |
|
"User Behavior Alignment": { |
|
"Overall": 39.43 |
|
}, |
|
"Multi-lingual Abilities": { |
|
"Overall": 44.23 |
|
} |
|
}, |
|
"LLaMA3-8B-Instruct": { |
|
"META": { |
|
"Method": [ |
|
"LLaMA3-8B-Instruct", |
|
"https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct" |
|
], |
|
"Parameters": "8B", |
|
"Org": "Meta", |
|
"OpenSource": "Yes", |
|
"Verified": "Yes" |
|
}, |
|
"Shopping Concept Understanding": { |
|
"Overall": 65.26 |
|
}, |
|
"Shopping Knowledge Reasoning": { |
|
"Overall": 56.84 |
|
}, |
|
"User Behavior Alignment": { |
|
"Overall": 54.88 |
|
}, |
|
"Multi-lingual Abilities": { |
|
"Overall": 55.37 |
|
} |
|
}, |
|
"LLaMA3-8B": { |
|
"META": { |
|
"Method": [ |
|
"LLaMA3-8B", |
|
"https://huggingface.co/meta-llama/Meta-Llama-3-8B" |
|
], |
|
"Parameters": "8B", |
|
"Org": "Meta", |
|
"OpenSource": "Yes", |
|
"Verified": "Yes" |
|
}, |
|
"Shopping Concept Understanding": { |
|
"Overall": 58.02 |
|
}, |
|
"Shopping Knowledge Reasoning": { |
|
"Overall": 49.74 |
|
}, |
|
"User Behavior Alignment": { |
|
"Overall": 44.16 |
|
}, |
|
"Multi-lingual Abilities": { |
|
"Overall": 51.03 |
|
} |
|
}, |
|
"QWen1.5-7B": { |
|
"META": { |
|
"Method": [ |
|
"QWen1.5-7B", |
|
"https://huggingface.co/Qwen/Qwen1.5-7B" |
|
], |
|
"Parameters": "7B", |
|
"Org": "Alibaba", |
|
"OpenSource": "Yes", |
|
"Verified": "Yes" |
|
}, |
|
"Shopping Concept Understanding": { |
|
"Overall": 58.89 |
|
}, |
|
"Shopping Knowledge Reasoning": { |
|
"Overall": 52.34 |
|
}, |
|
"User Behavior Alignment": { |
|
"Overall": 49.81 |
|
}, |
|
"Multi-lingual Abilities": { |
|
"Overall": 50.14 |
|
} |
|
}, |
|
"eCeLLM-M": { |
|
"META": { |
|
"Method": [ |
|
"eCeLLM-M", |
|
"https://huggingface.co/NingLab/eCeLLM-M" |
|
], |
|
"Parameters": "7B", |
|
"Org": "OSU NingLab", |
|
"OpenSource": "Yes", |
|
"Verified": "Yes" |
|
}, |
|
"Shopping Concept Understanding": { |
|
"Overall": 63.29 |
|
}, |
|
"Shopping Knowledge Reasoning": { |
|
"Overall": 48.94 |
|
}, |
|
"User Behavior Alignment": { |
|
"Overall": 53.78 |
|
}, |
|
"Multi-lingual Abilities": { |
|
"Overall": 56.08 |
|
} |
|
}, |
|
"Zephyr-Beta": { |
|
"META": { |
|
"Method": [ |
|
"Zephyr-Beta", |
|
"https://huggingface.co/HuggingFaceH4/zephyr-7b-beta" |
|
], |
|
"Parameters": "7B", |
|
"Org": "HuggingFace H4", |
|
"OpenSource": "Yes", |
|
"Verified": "Yes" |
|
}, |
|
"Shopping Concept Understanding": { |
|
"Overall": 61.65 |
|
}, |
|
"Shopping Knowledge Reasoning": { |
|
"Overall": 52.57 |
|
}, |
|
"User Behavior Alignment": { |
|
"Overall": 44.73 |
|
}, |
|
"Multi-lingual Abilities": { |
|
"Overall": 45.35 |
|
} |
|
}, |
|
"Mistral-7B-Instruct": { |
|
"META": { |
|
"Method": [ |
|
"Mistral-7B-Instruct", |
|
"https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2" |
|
], |
|
"Parameters": "7B", |
|
"Org": "MistralAI", |
|
"OpenSource": "Yes", |
|
"Verified": "Yes" |
|
}, |
|
"Shopping Concept Understanding": { |
|
"Overall": 62.03 |
|
}, |
|
"Shopping Knowledge Reasoning": { |
|
"Overall": 46.36 |
|
}, |
|
"User Behavior Alignment": { |
|
"Overall": 42.21 |
|
}, |
|
"Multi-lingual Abilities": { |
|
"Overall": 43.32 |
|
} |
|
}, |
|
"Mistral-7B": { |
|
"META": { |
|
"Method": [ |
|
"Mistral-7B", |
|
"https://huggingface.co/mistralai/Mistral-7B-v0.1" |
|
], |
|
"Parameters": "7B", |
|
"Org": "MistralAI", |
|
"OpenSource": "Yes", |
|
"Verified": "Yes" |
|
}, |
|
"Shopping Concept Understanding": { |
|
"Overall": 55.82 |
|
}, |
|
"Shopping Knowledge Reasoning": { |
|
"Overall": 46.69 |
|
}, |
|
"User Behavior Alignment": { |
|
"Overall": 46.27 |
|
}, |
|
"Multi-lingual Abilities": { |
|
"Overall": 41.47 |
|
} |
|
}, |
|
"Vicuna-7B-v1.5": { |
|
"META": { |
|
"Method": [ |
|
"Vicuna-7B-v1.5", |
|
"https://huggingface.co/lmsys/vicuna-7b-v1.5" |
|
], |
|
"Parameters": "7B", |
|
"Org": "LMSys", |
|
"OpenSource": "Yes", |
|
"Verified": "Yes" |
|
}, |
|
"Shopping Concept Understanding": { |
|
"Overall": 53.46 |
|
}, |
|
"Shopping Knowledge Reasoning": { |
|
"Overall": 45.06 |
|
}, |
|
"User Behavior Alignment": { |
|
"Overall": 41.11 |
|
}, |
|
"Multi-lingual Abilities": { |
|
"Overall": 43.82 |
|
} |
|
}, |
|
"LLaMA2-7B-Chat": { |
|
"META": { |
|
"Method": [ |
|
"LLaMA2-7B-Chat", |
|
"https://huggingface.co/meta-llama/Llama-2-7b-chat-hf" |
|
], |
|
"Parameters": "7B", |
|
"Org": "Meta", |
|
"OpenSource": "Yes", |
|
"Verified": "Yes" |
|
}, |
|
"Shopping Concept Understanding": { |
|
"Overall": 51.67 |
|
}, |
|
"Shopping Knowledge Reasoning": { |
|
"Overall": 43.48 |
|
}, |
|
"User Behavior Alignment": { |
|
"Overall": 41.42 |
|
}, |
|
"Multi-lingual Abilities": { |
|
"Overall": 40.43 |
|
} |
|
}, |
|
"LLaMA2-7B": { |
|
"META": { |
|
"Method": [ |
|
"LLaMA2-7B", |
|
"https://huggingface.co/meta-llama/Llama-2-7b-hf" |
|
], |
|
"Parameters": "7B", |
|
"Org": "Meta", |
|
"OpenSource": "Yes", |
|
"Verified": "Yes" |
|
}, |
|
"Shopping Concept Understanding": { |
|
"Overall": 38.22 |
|
}, |
|
"Shopping Knowledge Reasoning": { |
|
"Overall": 32.81 |
|
}, |
|
"User Behavior Alignment": { |
|
"Overall": 32.56 |
|
}, |
|
"Multi-lingual Abilities": { |
|
"Overall": 27.71 |
|
} |
|
}, |
|
"QWen1.5-4B": { |
|
"META": { |
|
"Method": [ |
|
"QWen1.5-4B", |
|
"https://huggingface.co/Qwen/Qwen1.5-4B" |
|
], |
|
"Parameters": "4B", |
|
"Org": "Alibaba", |
|
"OpenSource": "Yes", |
|
"Verified": "Yes" |
|
}, |
|
"Shopping Concept Understanding": { |
|
"Overall": 57.21 |
|
}, |
|
"Shopping Knowledge Reasoning": { |
|
"Overall": 52.56 |
|
}, |
|
"User Behavior Alignment": { |
|
"Overall": 42.74 |
|
}, |
|
"Multi-lingual Abilities": { |
|
"Overall": 49.78 |
|
} |
|
}, |
|
"Phi-2": { |
|
"META": { |
|
"Method": [ |
|
"Phi-2", |
|
"https://huggingface.co/microsoft/phi-2" |
|
], |
|
"Parameters": "2.8B", |
|
"Org": "Microsoft", |
|
"OpenSource": "Yes", |
|
"Verified": "Yes" |
|
}, |
|
"Shopping Concept Understanding": { |
|
"Overall": 49.34 |
|
}, |
|
"Shopping Knowledge Reasoning": { |
|
"Overall": 42.83 |
|
}, |
|
"User Behavior Alignment": { |
|
"Overall": 36.38 |
|
}, |
|
"Multi-lingual Abilities": { |
|
"Overall": 32.91 |
|
} |
|
}, |
|
"eCeLLM-S": { |
|
"META": { |
|
"Method": [ |
|
"eCeLLM-S", |
|
"https://huggingface.co/NingLab/eCeLLM-S" |
|
], |
|
"Parameters": "2.8B", |
|
"Org": "OSU NingLab", |
|
"OpenSource": "Yes", |
|
"Verified": "Yes" |
|
}, |
|
"Shopping Concept Understanding": { |
|
"Overall": 49.4 |
|
}, |
|
"Shopping Knowledge Reasoning": { |
|
"Overall": 39.06 |
|
}, |
|
"User Behavior Alignment": { |
|
"Overall": 36.33 |
|
}, |
|
"Multi-lingual Abilities": { |
|
"Overall": 32.79 |
|
} |
|
} |
|
} |
|
} |