{ "time": "241031154353", "results": { "Claude3-Sonnet": { "META": { "Method": [ "Claude3-Sonnet", "https://aws.amazon.com/bedrock/claude/" ], "Parameters": "", "Org": "Anthropic", "OpenSource": "No", "Verified": "Yes" }, "Shopping Concept Understanding": { "Overall": 80.75 }, "Shopping Knowledge Reasoning": { "Overall": 71.63 }, "User Behavior Alignment": { "Overall": 70.17 }, "Multi-lingual Abilities": { "Overall": 67.76 } }, "Claude2": { "META": { "Method": [ "Claude2", "https://aws.amazon.com/bedrock/claude/" ], "Parameters": "", "Org": "Anthropic", "OpenSource": "No", "Verified": "Yes" }, "Shopping Concept Understanding": { "Overall": 75.46 }, "Shopping Knowledge Reasoning": { "Overall": 65.5 }, "User Behavior Alignment": { "Overall": 63.53 }, "Multi-lingual Abilities": { "Overall": 65.24 } }, "ChatGPT": { "META": { "Method": [ "ChatGPT", "https://platform.openai.com/docs/models#gpt-3-5-turbo" ], "Parameters": "", "Org": "OpenAI", "OpenSource": "No", "Verified": "Yes" }, "Shopping Concept Understanding": { "Overall": 75.63 }, "Shopping Knowledge Reasoning": { "Overall": 64.97 }, "User Behavior Alignment": { "Overall": 59.79 }, "Multi-lingual Abilities": { "Overall": 60.81 } }, "LLaMA3-70B-Instruct": { "META": { "Method": [ "LLaMA3-70B-Instruct", "https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct" ], "Parameters": "70B", "Org": "Meta", "OpenSource": "Yes", "Verified": "Yes" }, "Shopping Concept Understanding": { "Overall": 75.24 }, "Shopping Knowledge Reasoning": { "Overall": 69.29 }, "User Behavior Alignment": { "Overall": 67.67 }, "Multi-lingual Abilities": { "Overall": 62.0 } }, "QWen1.5-72B": { "META": { "Method": [ "QWen1.5-72B", "https://huggingface.co/Qwen/Qwen1.5-72B" ], "Parameters": "72B", "Org": "Alibaba", "OpenSource": "Yes", "Verified": "Yes" }, "Shopping Concept Understanding": { "Overall": 71.67 }, "Shopping Knowledge Reasoning": { "Overall": 68.92 }, "User Behavior Alignment": { "Overall": 64.12 }, "Multi-lingual Abilities": { "Overall": 64.84 } }, "LLaMA3-70B": { "META": { "Method": [ "LLaMA3-70B", "https://huggingface.co/meta-llama/Meta-Llama-3-70B" ], "Parameters": "70B", "Org": "Meta", "OpenSource": "Yes", "Verified": "Yes" }, "Shopping Concept Understanding": { "Overall": 69.59 }, "Shopping Knowledge Reasoning": { "Overall": 63.56 }, "User Behavior Alignment": { "Overall": 55.77 }, "Multi-lingual Abilities": { "Overall": 58.95 } }, "LLaMA2-70B-Chat": { "META": { "Method": [ "LLaMA2-70B-Chat", "https://huggingface.co/meta-llama/Llama-2-70b-chat-hf" ], "Parameters": "70B", "Org": "Meta", "OpenSource": "Yes", "Verified": "Yes" }, "Shopping Concept Understanding": { "Overall": 61.84 }, "Shopping Knowledge Reasoning": { "Overall": 40.73 }, "User Behavior Alignment": { "Overall": 44.2 }, "Multi-lingual Abilities": { "Overall": 47.04 } }, "LLaMA2-70B": { "META": { "Method": [ "LLaMA2-70B", "https://huggingface.co/meta-llama/Llama-2-70b-hf" ], "Parameters": "70B", "Org": "Meta", "OpenSource": "Yes", "Verified": "Yes" }, "Shopping Concept Understanding": { "Overall": 61.05 }, "Shopping Knowledge Reasoning": { "Overall": 55.87 }, "User Behavior Alignment": { "Overall": 43.24 }, "Multi-lingual Abilities": { "Overall": 47.85 } }, "Mixtral-8x7B": { "META": { "Method": [ "Mixtral-8x7B", "https://huggingface.co/mistralai/Mixtral-8x7B-v0.1" ], "Parameters": "46.7B", "Org": "MistralAI", "OpenSource": "Yes", "Verified": "Yes" }, "Shopping Concept Understanding": { "Overall": 59.43 }, "Shopping Knowledge Reasoning": { "Overall": 54.32 }, "User Behavior Alignment": { "Overall": 55.31 }, "Multi-lingual Abilities": { "Overall": 44.69 } }, "QWen1.5-14B": { "META": { "Method": [ "QWen1.5-14B", "https://huggingface.co/Qwen/Qwen1.5-14B" ], "Parameters": "14B", "Org": "Alibaba", "OpenSource": "Yes", "Verified": "Yes" }, "Shopping Concept Understanding": { "Overall": 67.22 }, "Shopping Knowledge Reasoning": { "Overall": 60.92 }, "User Behavior Alignment": { "Overall": 54.92 }, "Multi-lingual Abilities": { "Overall": 55.21 } }, "eCeLLM-L": { "META": { "Method": [ "eCeLLM-L", "https://huggingface.co/NingLab/eCeLLM-L" ], "Parameters": "13B", "Org": "OSU NingLab", "OpenSource": "Yes", "Verified": "Yes" }, "Shopping Concept Understanding": { "Overall": 61.54 }, "Shopping Knowledge Reasoning": { "Overall": 54.84 }, "User Behavior Alignment": { "Overall": 54.55 }, "Multi-lingual Abilities": { "Overall": 59.64 } }, "Vicuna-13B-v1.5": { "META": { "Method": [ "Vicuna-13B-v1.5", "https://huggingface.co/lmsys/vicuna-13b-v1.5" ], "Parameters": "13B", "Org": "LMSys", "OpenSource": "Yes", "Verified": "Yes" }, "Shopping Concept Understanding": { "Overall": 59.64 }, "Shopping Knowledge Reasoning": { "Overall": 52.63 }, "User Behavior Alignment": { "Overall": 49.81 }, "Multi-lingual Abilities": { "Overall": 49.64 } }, "LLaMA2-13B-Chat": { "META": { "Method": [ "LLaMA2-13B-Chat", "https://huggingface.co/meta-llama/Llama-2-13b-chat-hf" ], "Parameters": "13B", "Org": "Meta", "OpenSource": "Yes", "Verified": "Yes" }, "Shopping Concept Understanding": { "Overall": 51.79 }, "Shopping Knowledge Reasoning": { "Overall": 45.01 }, "User Behavior Alignment": { "Overall": 39.95 }, "Multi-lingual Abilities": { "Overall": 42.99 } }, "LLaMA2-13B": { "META": { "Method": [ "LLaMA2-13B", "https://huggingface.co/meta-llama/Llama-2-13b-hf" ], "Parameters": "13B", "Org": "Meta", "OpenSource": "Yes", "Verified": "Yes" }, "Shopping Concept Understanding": { "Overall": 45.86 }, "Shopping Knowledge Reasoning": { "Overall": 39.47 }, "User Behavior Alignment": { "Overall": 39.43 }, "Multi-lingual Abilities": { "Overall": 44.23 } }, "LLaMA3-8B-Instruct": { "META": { "Method": [ "LLaMA3-8B-Instruct", "https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct" ], "Parameters": "8B", "Org": "Meta", "OpenSource": "Yes", "Verified": "Yes" }, "Shopping Concept Understanding": { "Overall": 65.26 }, "Shopping Knowledge Reasoning": { "Overall": 56.84 }, "User Behavior Alignment": { "Overall": 54.88 }, "Multi-lingual Abilities": { "Overall": 55.37 } }, "LLaMA3-8B": { "META": { "Method": [ "LLaMA3-8B", "https://huggingface.co/meta-llama/Meta-Llama-3-8B" ], "Parameters": "8B", "Org": "Meta", "OpenSource": "Yes", "Verified": "Yes" }, "Shopping Concept Understanding": { "Overall": 58.02 }, "Shopping Knowledge Reasoning": { "Overall": 49.74 }, "User Behavior Alignment": { "Overall": 44.16 }, "Multi-lingual Abilities": { "Overall": 51.03 } }, "QWen1.5-7B": { "META": { "Method": [ "QWen1.5-7B", "https://huggingface.co/Qwen/Qwen1.5-7B" ], "Parameters": "7B", "Org": "Alibaba", "OpenSource": "Yes", "Verified": "Yes" }, "Shopping Concept Understanding": { "Overall": 58.89 }, "Shopping Knowledge Reasoning": { "Overall": 52.34 }, "User Behavior Alignment": { "Overall": 49.81 }, "Multi-lingual Abilities": { "Overall": 50.14 } }, "eCeLLM-M": { "META": { "Method": [ "eCeLLM-M", "https://huggingface.co/NingLab/eCeLLM-M" ], "Parameters": "7B", "Org": "OSU NingLab", "OpenSource": "Yes", "Verified": "Yes" }, "Shopping Concept Understanding": { "Overall": 63.29 }, "Shopping Knowledge Reasoning": { "Overall": 48.94 }, "User Behavior Alignment": { "Overall": 53.78 }, "Multi-lingual Abilities": { "Overall": 56.08 } }, "Zephyr-Beta": { "META": { "Method": [ "Zephyr-Beta", "https://huggingface.co/HuggingFaceH4/zephyr-7b-beta" ], "Parameters": "7B", "Org": "HuggingFace H4", "OpenSource": "Yes", "Verified": "Yes" }, "Shopping Concept Understanding": { "Overall": 61.65 }, "Shopping Knowledge Reasoning": { "Overall": 52.57 }, "User Behavior Alignment": { "Overall": 44.73 }, "Multi-lingual Abilities": { "Overall": 45.35 } }, "Mistral-7B-Instruct": { "META": { "Method": [ "Mistral-7B-Instruct", "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2" ], "Parameters": "7B", "Org": "MistralAI", "OpenSource": "Yes", "Verified": "Yes" }, "Shopping Concept Understanding": { "Overall": 62.03 }, "Shopping Knowledge Reasoning": { "Overall": 46.36 }, "User Behavior Alignment": { "Overall": 42.21 }, "Multi-lingual Abilities": { "Overall": 43.32 } }, "Mistral-7B": { "META": { "Method": [ "Mistral-7B", "https://huggingface.co/mistralai/Mistral-7B-v0.1" ], "Parameters": "7B", "Org": "MistralAI", "OpenSource": "Yes", "Verified": "Yes" }, "Shopping Concept Understanding": { "Overall": 55.82 }, "Shopping Knowledge Reasoning": { "Overall": 46.69 }, "User Behavior Alignment": { "Overall": 46.27 }, "Multi-lingual Abilities": { "Overall": 41.47 } }, "Vicuna-7B-v1.5": { "META": { "Method": [ "Vicuna-7B-v1.5", "https://huggingface.co/lmsys/vicuna-7b-v1.5" ], "Parameters": "7B", "Org": "LMSys", "OpenSource": "Yes", "Verified": "Yes" }, "Shopping Concept Understanding": { "Overall": 53.46 }, "Shopping Knowledge Reasoning": { "Overall": 45.06 }, "User Behavior Alignment": { "Overall": 41.11 }, "Multi-lingual Abilities": { "Overall": 43.82 } }, "LLaMA2-7B-Chat": { "META": { "Method": [ "LLaMA2-7B-Chat", "https://huggingface.co/meta-llama/Llama-2-7b-chat-hf" ], "Parameters": "7B", "Org": "Meta", "OpenSource": "Yes", "Verified": "Yes" }, "Shopping Concept Understanding": { "Overall": 51.67 }, "Shopping Knowledge Reasoning": { "Overall": 43.48 }, "User Behavior Alignment": { "Overall": 41.42 }, "Multi-lingual Abilities": { "Overall": 40.43 } }, "LLaMA2-7B": { "META": { "Method": [ "LLaMA2-7B", "https://huggingface.co/meta-llama/Llama-2-7b-hf" ], "Parameters": "7B", "Org": "Meta", "OpenSource": "Yes", "Verified": "Yes" }, "Shopping Concept Understanding": { "Overall": 38.22 }, "Shopping Knowledge Reasoning": { "Overall": 32.81 }, "User Behavior Alignment": { "Overall": 32.56 }, "Multi-lingual Abilities": { "Overall": 27.71 } }, "QWen1.5-4B": { "META": { "Method": [ "QWen1.5-4B", "https://huggingface.co/Qwen/Qwen1.5-4B" ], "Parameters": "4B", "Org": "Alibaba", "OpenSource": "Yes", "Verified": "Yes" }, "Shopping Concept Understanding": { "Overall": 57.21 }, "Shopping Knowledge Reasoning": { "Overall": 52.56 }, "User Behavior Alignment": { "Overall": 42.74 }, "Multi-lingual Abilities": { "Overall": 49.78 } }, "Phi-2": { "META": { "Method": [ "Phi-2", "https://huggingface.co/microsoft/phi-2" ], "Parameters": "2.8B", "Org": "Microsoft", "OpenSource": "Yes", "Verified": "Yes" }, "Shopping Concept Understanding": { "Overall": 49.34 }, "Shopping Knowledge Reasoning": { "Overall": 42.83 }, "User Behavior Alignment": { "Overall": 36.38 }, "Multi-lingual Abilities": { "Overall": 32.91 } }, "eCeLLM-S": { "META": { "Method": [ "eCeLLM-S", "https://huggingface.co/NingLab/eCeLLM-S" ], "Parameters": "2.8B", "Org": "OSU NingLab", "OpenSource": "Yes", "Verified": "Yes" }, "Shopping Concept Understanding": { "Overall": 49.4 }, "Shopping Knowledge Reasoning": { "Overall": 39.06 }, "User Behavior Alignment": { "Overall": 36.33 }, "Multi-lingual Abilities": { "Overall": 32.79 } } } }