itseffi
Increase max_tokens to 512 for more detailed AI recommendations
7dce4eb
import express from 'express';
import cors from 'cors';
import { HfInference } from '@huggingface/inference';
const app = express();
const PORT = 3000;
app.use(cors());
app.use(express.json({ limit: '10mb' }));
// Health check
app.get('/api/health', (req, res) => {
res.json({
status: 'ok',
service: 'ai-inference-proxy',
hasKey: !!process.env.HF_API_KEY
});
});
// AI inference proxy endpoint
app.post('/api/inference', async (req, res) => {
const apiKey = process.env.HF_API_KEY;
const { prompt } = req.body || {};
if (!prompt || typeof prompt !== 'string' || !prompt.trim()) {
return res.status(400).json({ error: 'Prompt is required' });
}
// If no key, return your graceful fallback
if (!apiKey) {
console.log('No HF_API_KEY set -> using fallback');
return res.status(200).json({ fallback: true, message: 'Using rule-based recommendations' });
}
try {
console.log('Calling HF Inference Providers API (router)...');
// Use direct HTTP call to router (as per docs)
const response = await fetch(
'https://router.huggingface.co/v1/chat/completions',
{
method: 'POST',
headers: {
'Authorization': `Bearer ${apiKey}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'meta-llama/Llama-3.2-3B-Instruct',
messages: [
{
role: 'user',
content: prompt
}
],
max_tokens: 512,
temperature: 0.2,
stream: false
}),
}
);
if (!response.ok) {
const errorText = await response.text();
console.error('HF Router API error:', response.status, response.statusText);
console.error('Response body:', errorText);
return res.status(200).json({
fallback: true,
error: 'HF Router API error',
status: response.status,
details: errorText
});
}
const data = await response.json();
console.log('HF Inference Providers API response received');
// Return in expected format
const generatedText = data.choices[0].message.content;
return res.status(200).json([{ generated_text: generatedText }]);
} catch (e) {
// Hugging Face SDK puts details on e.cause
const status = e?.cause?.status || e?.response?.status;
const body = e?.cause?.response?.text ? await e.cause.response.text() :
e?.response?.text ? await e.response.text() : undefined;
console.error('Proxy error:', e);
console.error('Status:', status);
if (body) console.error('Body:', body);
// Graceful fallback so the UI still works
return res.status(200).json({
fallback: true,
error: 'HF call failed',
status,
details: body || e.message || 'Unknown error'
});
}
});
app.listen(PORT, '0.0.0.0', () => {
console.log(`βœ… AI inference proxy running on port ${PORT}`);
console.log(`Node version: ${process.version}`);
console.log(`Model: meta-llama/Llama-3.2-3B-Instruct`);
console.log(`API key configured: ${!!process.env.HF_API_KEY}`);
});