question-explorer-api / services /claudeQuestionGenerator.js
Eric Gardner
Use Claude API for question generation
ce30646
import Anthropic from '@anthropic-ai/sdk';
let client = null;
/**
* Initialize the Anthropic client
*/
function getClient() {
if ( !client ) {
const apiKey = process.env.ANTHROPIC_API_KEY;
if ( !apiKey ) {
throw new Error( 'ANTHROPIC_API_KEY environment variable is required for Claude question generation' );
}
client = new Anthropic( { apiKey } );
}
return client;
}
/**
* Generate questions using Claude based on the full article text.
*
* This approach reads the entire article and generates questions designed to
* draw readers deeper into the content, beyond surface-level facts.
*
* @param {Array} chunks - Article chunks with text and section info
* @param {string} articleTitle - The title of the article
* @param {number} numQuestions - Number of questions to generate (default: 5)
* @returns {Promise<string[]>} - Array of generated questions
*/
export async function generateQuestionsWithClaude( chunks, articleTitle, numQuestions = 5 ) {
const anthropic = getClient();
// Build a structured representation of the article
const articleContent = buildArticleContent( chunks );
// Estimate token count - Claude can handle ~100k tokens, but we'll be conservative
const estimatedTokens = Math.ceil( articleContent.length / 4 );
console.log( `Article content: ~${ estimatedTokens } tokens estimated` );
// If article is very long, summarize sections
const contentToUse = estimatedTokens > 50000
? truncateArticleContent( chunks, 50000 )
: articleContent;
const prompt = `You are helping create an interactive Wikipedia reading experience. Given the following Wikipedia article about "${articleTitle}", generate ${numQuestions} short, simple questions that invite readers to explore the article.
**CRITICAL: Base questions ONLY on the provided article text.**
You must generate questions answerable using ONLY information in the article below. Do not use external knowledge. If you know facts about "${articleTitle}" not mentioned in this text, do NOT ask about them.
**Question style:**
- **Keep it short** - Questions should be 5-10 words. Simple, open-ended phrasing.
- **Use plain language** - Write for casual readers, not academics.
- **Be inviting, not testing** - Questions should spark curiosity, not feel like a quiz.
Good examples:
- "Why did Plato write about this?"
- "What happened to the search expeditions?"
- "How did this influence later writers?"
Avoid:
- Long, complex questions with multiple clauses
- Academic or formal phrasing
- Questions answered in the opening paragraph
**Content guidelines:**
- Look for interesting details deeper in the article, not just the lead
- Reference specific things mentioned in the text
- Vary the topics covered across your questions
<article>
${contentToUse}
</article>
Generate exactly ${numQuestions} questions, one per line. Output only the questions, no numbering. Keep each question short and simple.`;
try {
const response = await anthropic.messages.create( {
model: 'claude-sonnet-4-5',
max_tokens: 1024,
messages: [
{
role: 'user',
content: prompt
}
]
} );
const text = response.content[ 0 ].text;
const questions = text
.split( '\n' )
.map( ( q ) => q.trim() )
.filter( ( q ) => q.length > 10 && q.endsWith( '?' ) );
console.log( `Claude generated ${ questions.length } questions` );
return questions.slice( 0, numQuestions );
} catch ( error ) {
console.error( 'Claude question generation failed:', error.message );
throw error;
}
}
/**
* Build a structured text representation of the article from chunks
*
* @param {Array} chunks - Article chunks
* @returns {string} - Formatted article content
*/
function buildArticleContent( chunks ) {
const sections = new Map();
// Group chunks by section
for ( const chunk of chunks ) {
const sectionTitle = chunk.sectionTitle || 'Introduction';
if ( !sections.has( sectionTitle ) ) {
sections.set( sectionTitle, [] );
}
sections.get( sectionTitle ).push( chunk.text );
}
// Build formatted content
const parts = [];
for ( const [ sectionTitle, texts ] of sections ) {
parts.push( `## ${sectionTitle}\n` );
parts.push( texts.join( '\n\n' ) );
parts.push( '' );
}
return parts.join( '\n' );
}
/**
* Truncate article content to fit within token budget
*
* @param {Array} chunks - Article chunks
* @param {number} maxTokens - Maximum estimated tokens
* @returns {string} - Truncated content
*/
function truncateArticleContent( chunks, maxTokens ) {
const sections = new Map();
// Group chunks by section
for ( const chunk of chunks ) {
const sectionTitle = chunk.sectionTitle || 'Introduction';
if ( !sections.has( sectionTitle ) ) {
sections.set( sectionTitle, [] );
}
sections.get( sectionTitle ).push( chunk.text );
}
// Include all section headers and first paragraph of each
const parts = [];
let estimatedTokens = 0;
const charsPerToken = 4;
for ( const [ sectionTitle, texts ] of sections ) {
const header = `## ${sectionTitle}\n`;
const sectionContent = texts.join( '\n\n' );
const headerTokens = Math.ceil( header.length / charsPerToken );
const contentTokens = Math.ceil( sectionContent.length / charsPerToken );
if ( estimatedTokens + headerTokens + contentTokens < maxTokens ) {
parts.push( header );
parts.push( sectionContent );
parts.push( '' );
estimatedTokens += headerTokens + contentTokens;
} else if ( estimatedTokens + headerTokens + 500 < maxTokens ) {
// Include header and truncated content
parts.push( header );
const availableChars = ( maxTokens - estimatedTokens - headerTokens ) * charsPerToken;
parts.push( sectionContent.slice( 0, availableChars ) + '...' );
parts.push( '' );
break;
} else {
break;
}
}
return parts.join( '\n' );
}
/**
* Check if Claude question generation is available
*
* @returns {boolean} - True if ANTHROPIC_API_KEY is set
*/
export function isClaudeAvailable() {
return Boolean( process.env.ANTHROPIC_API_KEY );
}