Spaces:

egardner
/

question-explorer-api

Sleeping

App Files Files Community

question-explorer-api / services /claudeQuestionGenerator.js

Eric Gardner

Use Claude API for question generation

ce30646 about 1 month ago

history blame contribute delete

6.02 kB

	import Anthropic from '@anthropic-ai/sdk';

	let client = null;

	/**
	* Initialize the Anthropic client
	*/
	function getClient() {
	if ( !client ) {
	const apiKey = process.env.ANTHROPIC_API_KEY;
	if ( !apiKey ) {
	throw new Error( 'ANTHROPIC_API_KEY environment variable is required for Claude question generation' );
	}
	client = new Anthropic( { apiKey } );
	}
	return client;
	}

	/**
	* Generate questions using Claude based on the full article text.
	*
	* This approach reads the entire article and generates questions designed to
	* draw readers deeper into the content, beyond surface-level facts.
	*
	* @param {Array} chunks - Article chunks with text and section info
	* @param {string} articleTitle - The title of the article
	* @param {number} numQuestions - Number of questions to generate (default: 5)
	* @returns {Promise<string[]>} - Array of generated questions
	*/
	export async function generateQuestionsWithClaude( chunks, articleTitle, numQuestions = 5 ) {
	const anthropic = getClient();

	// Build a structured representation of the article
	const articleContent = buildArticleContent( chunks );

	// Estimate token count - Claude can handle ~100k tokens, but we'll be conservative
	const estimatedTokens = Math.ceil( articleContent.length / 4 );
	console.log( `Article content: ~${ estimatedTokens } tokens estimated` );

	// If article is very long, summarize sections
	const contentToUse = estimatedTokens > 50000
	? truncateArticleContent( chunks, 50000 )
	: articleContent;

	const prompt = `You are helping create an interactive Wikipedia reading experience. Given the following Wikipedia article about "${articleTitle}", generate ${numQuestions} short, simple questions that invite readers to explore the article.

	CRITICAL: Base questions ONLY on the provided article text.

	You must generate questions answerable using ONLY information in the article below. Do not use external knowledge. If you know facts about "${articleTitle}" not mentioned in this text, do NOT ask about them.

	Question style:

	- Keep it short - Questions should be 5-10 words. Simple, open-ended phrasing.
	- Use plain language - Write for casual readers, not academics.
	- Be inviting, not testing - Questions should spark curiosity, not feel like a quiz.

	Good examples:
	- "Why did Plato write about this?"
	- "What happened to the search expeditions?"
	- "How did this influence later writers?"

	Avoid:
	- Long, complex questions with multiple clauses
	- Academic or formal phrasing
	- Questions answered in the opening paragraph

	Content guidelines:

	- Look for interesting details deeper in the article, not just the lead
	- Reference specific things mentioned in the text
	- Vary the topics covered across your questions

	<article>
	${contentToUse}
	</article>

	Generate exactly ${numQuestions} questions, one per line. Output only the questions, no numbering. Keep each question short and simple.`;

	try {
	const response = await anthropic.messages.create( {
	model: 'claude-sonnet-4-5',
	max_tokens: 1024,
	messages: [
	{
	role: 'user',
	content: prompt
	}
	]
	} );

	const text = response.content[ 0 ].text;
	const questions = text
	.split( '\n' )
	.map( ( q ) => q.trim() )
	.filter( ( q ) => q.length > 10 && q.endsWith( '?' ) );

	console.log( `Claude generated ${ questions.length } questions` );
	return questions.slice( 0, numQuestions );

	} catch ( error ) {
	console.error( 'Claude question generation failed:', error.message );
	throw error;
	}
	}

	/**
	* Build a structured text representation of the article from chunks
	*
	* @param {Array} chunks - Article chunks
	* @returns {string} - Formatted article content
	*/
	function buildArticleContent( chunks ) {
	const sections = new Map();

	// Group chunks by section
	for ( const chunk of chunks ) {
	const sectionTitle = chunk.sectionTitle \|\| 'Introduction';
	if ( !sections.has( sectionTitle ) ) {
	sections.set( sectionTitle, [] );
	}
	sections.get( sectionTitle ).push( chunk.text );
	}

	// Build formatted content
	const parts = [];
	for ( const [ sectionTitle, texts ] of sections ) {
	parts.push( `## ${sectionTitle}\n` );
	parts.push( texts.join( '\n\n' ) );
	parts.push( '' );
	}

	return parts.join( '\n' );
	}

	/**
	* Truncate article content to fit within token budget
	*
	* @param {Array} chunks - Article chunks
	* @param {number} maxTokens - Maximum estimated tokens
	* @returns {string} - Truncated content
	*/
	function truncateArticleContent( chunks, maxTokens ) {
	const sections = new Map();

	// Group chunks by section
	for ( const chunk of chunks ) {
	const sectionTitle = chunk.sectionTitle \|\| 'Introduction';
	if ( !sections.has( sectionTitle ) ) {
	sections.set( sectionTitle, [] );
	}
	sections.get( sectionTitle ).push( chunk.text );
	}

	// Include all section headers and first paragraph of each
	const parts = [];
	let estimatedTokens = 0;
	const charsPerToken = 4;

	for ( const [ sectionTitle, texts ] of sections ) {
	const header = `## ${sectionTitle}\n`;
	const sectionContent = texts.join( '\n\n' );

	const headerTokens = Math.ceil( header.length / charsPerToken );
	const contentTokens = Math.ceil( sectionContent.length / charsPerToken );

	if ( estimatedTokens + headerTokens + contentTokens < maxTokens ) {
	parts.push( header );
	parts.push( sectionContent );
	parts.push( '' );
	estimatedTokens += headerTokens + contentTokens;
	} else if ( estimatedTokens + headerTokens + 500 < maxTokens ) {
	// Include header and truncated content
	parts.push( header );
	const availableChars = ( maxTokens - estimatedTokens - headerTokens ) * charsPerToken;
	parts.push( sectionContent.slice( 0, availableChars ) + '...' );
	parts.push( '' );
	break;
	} else {
	break;
	}
	}

	return parts.join( '\n' );
	}

	/**
	* Check if Claude question generation is available
	*
	* @returns {boolean} - True if ANTHROPIC_API_KEY is set
	*/
	export function isClaudeAvailable() {
	return Boolean( process.env.ANTHROPIC_API_KEY );
	}