|
[ |
|
{ |
|
"question": "Which of the following best describes a Large Language Model (LLM)?", |
|
"answer_a": "A model specializing in language recognition", |
|
"answer_b": "A massive neural network that understands and generates human language", |
|
"answer_c": "A model exclusively used for language data tasks like summarization or classification", |
|
"answer_d": "A rule-based chatbot used for conversations", |
|
"correct_answer": "B" |
|
}, |
|
{ |
|
"question": "LLMs are typically:", |
|
"answer_a": "Pre-trained on small, curated datasets", |
|
"answer_b": "Trained on large text corpora to capture linguistic patterns", |
|
"answer_c": "Trained purely on translation tasks", |
|
"answer_d": "Designed to function solely with GPU resources", |
|
"correct_answer": "B" |
|
}, |
|
{ |
|
"question": "Which of the following is a common architecture for LLMs?", |
|
"answer_a": "Convolutional Neural Networks (CNNs)", |
|
"answer_b": "Transformer", |
|
"answer_c": "Recurrent Neural Networks (RNNs) with LSTM", |
|
"answer_d": "Support Vector Machines", |
|
"correct_answer": "B" |
|
}, |
|
{ |
|
"question": "What does it mean when we say LLMs are \"autoregressive\"?", |
|
"answer_a": "They regress to the mean to reduce variance", |
|
"answer_b": "They generate text by predicting the next token based on previous tokens", |
|
"answer_c": "They can only handle labeled data", |
|
"answer_d": "They can output text only after the entire input is known at once", |
|
"correct_answer": "B" |
|
}, |
|
{ |
|
"question": "Which of these is NOT a common use of LLMs?", |
|
"answer_a": "Summarizing content", |
|
"answer_b": "Generating code", |
|
"answer_c": "Playing strategy games like chess or Go", |
|
"answer_d": "Conversational AI", |
|
"correct_answer": "C" |
|
}, |
|
{ |
|
"question": "Which of the following best describes a \"special token\"?", |
|
"answer_a": "A token that makes the model forget all context", |
|
"answer_b": "A model signature required for API calls", |
|
"answer_c": "A token that helps segment or structure the conversation in the model", |
|
"answer_d": "A token that always represents the end of text", |
|
"correct_answer": "C" |
|
}, |
|
{ |
|
"question": "What is the primary goal of a \"chat template\"?", |
|
"answer_a": "To force the model into a single-turn conversation", |
|
"answer_b": "To structure interactions and define roles in a conversation", |
|
"answer_c": "To replace the need for system messages", |
|
"answer_d": "To store prompts into the model's weights permanently", |
|
"correct_answer": "B" |
|
}, |
|
{ |
|
"question": "How do tokenizers handle text for modern NLP models?", |
|
"answer_a": "By splitting text into individual words only", |
|
"answer_b": "By breaking words into subword units and assigning numerical IDs", |
|
"answer_c": "By storing text directly without transformation", |
|
"answer_d": "By removing all punctuation automatically", |
|
"correct_answer": "B" |
|
}, |
|
{ |
|
"question": "Which role in a conversation sets the overall behavior for a model?", |
|
"answer_a": "user", |
|
"answer_b": "system", |
|
"answer_c": "assistant", |
|
"answer_d": "developer", |
|
"correct_answer": "B" |
|
}, |
|
{ |
|
"question": "Which statement is TRUE about tool usage in chat templates?", |
|
"answer_a": "Tools cannot be used within the conversation context.", |
|
"answer_b": "Tools are used only for logging messages.", |
|
"answer_c": "Tools allow the assistant to offload tasks like web search or calculations.", |
|
"answer_d": "Tools are unsupported in all modern LLMs.", |
|
"correct_answer": "C" |
|
} |
|
] |