CognxSafeTrack commited on
Commit ·
abc4e24
1
Parent(s): 86262f4
feat: implement STT inbound audio, TTS fallback, and Interactive Lists
Browse files
apps/api/src/routes/ai.ts
CHANGED
|
@@ -64,16 +64,22 @@ export async function aiRoutes(fastify: FastifyInstance) {
|
|
| 64 |
});
|
| 65 |
|
| 66 |
// 4. Generate TTS Audio for a lesson
|
| 67 |
-
fastify.post('/tts', async (request) => {
|
| 68 |
const bodySchema = z.object({ text: z.string() });
|
| 69 |
const { text } = bodySchema.parse(request.body);
|
| 70 |
|
| 71 |
console.log(`Generating TTS audio...`);
|
| 72 |
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
});
|
| 78 |
|
| 79 |
// 5. Transcribe audio (called by Railway worker after downloading media from Meta)
|
|
|
|
| 64 |
});
|
| 65 |
|
| 66 |
// 4. Generate TTS Audio for a lesson
|
| 67 |
+
fastify.post('/tts', async (request, reply) => {
|
| 68 |
const bodySchema = z.object({ text: z.string() });
|
| 69 |
const { text } = bodySchema.parse(request.body);
|
| 70 |
|
| 71 |
console.log(`Generating TTS audio...`);
|
| 72 |
|
| 73 |
+
try {
|
| 74 |
+
const audioBuffer = await aiService.generateSpeech(text);
|
| 75 |
+
const downloadUrl = await uploadFile(audioBuffer, `lesson-audio-${Date.now()}.mp3`, 'audio/mpeg');
|
| 76 |
+
return { success: true, url: downloadUrl };
|
| 77 |
+
} catch (err: any) {
|
| 78 |
+
if (err?.name === 'QuotaExceededError') {
|
| 79 |
+
return reply.code(429).send({ error: 'quota_exceeded' });
|
| 80 |
+
}
|
| 81 |
+
throw err;
|
| 82 |
+
}
|
| 83 |
});
|
| 84 |
|
| 85 |
// 5. Transcribe audio (called by Railway worker after downloading media from Meta)
|
apps/api/src/routes/internal.ts
CHANGED
|
@@ -7,7 +7,7 @@ import { WhatsAppService } from '../services/whatsapp';
|
|
| 7 |
*/
|
| 8 |
export async function internalRoutes(fastify: FastifyInstance) {
|
| 9 |
fastify.post<{
|
| 10 |
-
Body: { phone: string; text: string }
|
| 11 |
}>('/v1/internal/handle-message', {
|
| 12 |
config: { requireAuth: true } as any,
|
| 13 |
schema: {
|
|
@@ -16,18 +16,19 @@ export async function internalRoutes(fastify: FastifyInstance) {
|
|
| 16 |
required: ['phone', 'text'],
|
| 17 |
properties: {
|
| 18 |
phone: { type: 'string' },
|
| 19 |
-
text: { type: 'string' }
|
|
|
|
| 20 |
}
|
| 21 |
}
|
| 22 |
}
|
| 23 |
}, async (request, reply) => {
|
| 24 |
-
const { phone, text } = request.body;
|
| 25 |
-
if (!phone ||
|
| 26 |
return reply.code(400).send({ error: 'phone and text are required' });
|
| 27 |
}
|
| 28 |
|
| 29 |
// Fire and return — processing is async
|
| 30 |
-
WhatsAppService.handleIncomingMessage(phone, text).catch((err: Error) => {
|
| 31 |
request.log.error(`[INTERNAL] handleIncomingMessage error: ${err.message}`);
|
| 32 |
});
|
| 33 |
|
|
|
|
| 7 |
*/
|
| 8 |
export async function internalRoutes(fastify: FastifyInstance) {
|
| 9 |
fastify.post<{
|
| 10 |
+
Body: { phone: string; text: string; audioUrl?: string }
|
| 11 |
}>('/v1/internal/handle-message', {
|
| 12 |
config: { requireAuth: true } as any,
|
| 13 |
schema: {
|
|
|
|
| 16 |
required: ['phone', 'text'],
|
| 17 |
properties: {
|
| 18 |
phone: { type: 'string' },
|
| 19 |
+
text: { type: 'string' },
|
| 20 |
+
audioUrl: { type: 'string' }
|
| 21 |
}
|
| 22 |
}
|
| 23 |
}
|
| 24 |
}, async (request, reply) => {
|
| 25 |
+
const { phone, text, audioUrl } = request.body;
|
| 26 |
+
if (!phone || text === undefined) {
|
| 27 |
return reply.code(400).send({ error: 'phone and text are required' });
|
| 28 |
}
|
| 29 |
|
| 30 |
// Fire and return — processing is async
|
| 31 |
+
WhatsAppService.handleIncomingMessage(phone, text, audioUrl).catch((err: Error) => {
|
| 32 |
request.log.error(`[INTERNAL] handleIncomingMessage error: ${err.message}`);
|
| 33 |
});
|
| 34 |
|
apps/api/src/services/ai/openai-provider.ts
CHANGED
|
@@ -79,13 +79,20 @@ export class OpenAIProvider implements LLMProvider {
|
|
| 79 |
async generateSpeech(text: string): Promise<Buffer> {
|
| 80 |
console.log('[OPENAI] Generating speech TTS...');
|
| 81 |
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
}
|
| 90 |
}
|
| 91 |
|
|
|
|
| 79 |
async generateSpeech(text: string): Promise<Buffer> {
|
| 80 |
console.log('[OPENAI] Generating speech TTS...');
|
| 81 |
|
| 82 |
+
try {
|
| 83 |
+
const mp3 = await this.openai.audio.speech.create({
|
| 84 |
+
model: 'tts-1',
|
| 85 |
+
voice: 'alloy',
|
| 86 |
+
input: text,
|
| 87 |
+
});
|
| 88 |
+
return Buffer.from(await mp3.arrayBuffer());
|
| 89 |
+
} catch (err: any) {
|
| 90 |
+
if (err?.status === 429 || err?.code === 'insufficient_quota') {
|
| 91 |
+
console.warn('[OPENAI] 429 on generateSpeech');
|
| 92 |
+
throw new QuotaExceededError();
|
| 93 |
+
}
|
| 94 |
+
throw err;
|
| 95 |
+
}
|
| 96 |
}
|
| 97 |
}
|
| 98 |
|
apps/api/src/services/whatsapp.ts
CHANGED
|
@@ -3,9 +3,9 @@ import { scheduleMessage, enrollUser, whatsappQueue, scheduleTrackDay, scheduleI
|
|
| 3 |
import { QuotaExceededError } from './ai/openai-provider';
|
| 4 |
|
| 5 |
export class WhatsAppService {
|
| 6 |
-
static async handleIncomingMessage(phone: string, text: string) {
|
| 7 |
const normalizedText = text.trim().toUpperCase();
|
| 8 |
-
console.log(`Received message from ${phone}: ${normalizedText}`);
|
| 9 |
|
| 10 |
// 1. Find or Create User
|
| 11 |
let user = await prisma.user.findUnique({ where: { phone } });
|
|
@@ -27,6 +27,20 @@ export class WhatsAppService {
|
|
| 27 |
}
|
| 28 |
}
|
| 29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
// 1.5. Testing / Cheat Codes (Only for registered users)
|
| 31 |
if (normalizedText === 'INSCRIPTION') {
|
| 32 |
await prisma.enrollment.deleteMany({ where: { userId: user.id } });
|
|
@@ -64,8 +78,8 @@ export class WhatsAppService {
|
|
| 64 |
}
|
| 65 |
|
| 66 |
// ─── Interactive LIST action IDs ──────────────────────────────────────
|
| 67 |
-
// Format: DAY{N}_EXERCISE | DAY{N}_REPLAY | DAY{N}_CONTINUE
|
| 68 |
-
const dayActionMatch = normalizedText.match(/^DAY(\d+)_(EXERCISE|REPLAY|CONTINUE)$/);
|
| 69 |
if (dayActionMatch) {
|
| 70 |
const action = dayActionMatch[2];
|
| 71 |
|
|
@@ -87,10 +101,25 @@ export class WhatsAppService {
|
|
| 87 |
return;
|
| 88 |
} else if (action === 'EXERCISE') {
|
| 89 |
await scheduleMessage(user.id, user.language === 'WOLOF'
|
| 90 |
-
? "🎙️ Yónnee sa tontu (audio walla
|
| 91 |
: "🎙️ Envoie ta réponse (audio ou texte) :"
|
| 92 |
);
|
| 93 |
return;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
}
|
| 95 |
// CONTINUE → fall through to SUITE logic below
|
| 96 |
}
|
|
|
|
| 3 |
import { QuotaExceededError } from './ai/openai-provider';
|
| 4 |
|
| 5 |
export class WhatsAppService {
|
| 6 |
+
static async handleIncomingMessage(phone: string, text: string, audioUrl?: string) {
|
| 7 |
const normalizedText = text.trim().toUpperCase();
|
| 8 |
+
console.log(`Received message from ${phone}: ${normalizedText} (Audio: ${audioUrl || 'N/A'})`);
|
| 9 |
|
| 10 |
// 1. Find or Create User
|
| 11 |
let user = await prisma.user.findUnique({ where: { phone } });
|
|
|
|
| 27 |
}
|
| 28 |
}
|
| 29 |
|
| 30 |
+
// 1.2 Log the incoming message in the DB
|
| 31 |
+
try {
|
| 32 |
+
await prisma.message.create({
|
| 33 |
+
data: {
|
| 34 |
+
content: text,
|
| 35 |
+
mediaUrl: audioUrl,
|
| 36 |
+
direction: 'INBOUND',
|
| 37 |
+
userId: user.id
|
| 38 |
+
}
|
| 39 |
+
});
|
| 40 |
+
} catch (err: any) {
|
| 41 |
+
console.error('[WhatsAppService] Failed to log incoming message:', err.message);
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
// 1.5. Testing / Cheat Codes (Only for registered users)
|
| 45 |
if (normalizedText === 'INSCRIPTION') {
|
| 46 |
await prisma.enrollment.deleteMany({ where: { userId: user.id } });
|
|
|
|
| 78 |
}
|
| 79 |
|
| 80 |
// ─── Interactive LIST action IDs ──────────────────────────────────────
|
| 81 |
+
// Format: DAY{N}_EXERCISE | DAY{N}_REPLAY | DAY{N}_CONTINUE | DAY{N}_PROMPT
|
| 82 |
+
const dayActionMatch = normalizedText.match(/^DAY(\d+)_(EXERCISE|REPLAY|CONTINUE|PROMPT)$/);
|
| 83 |
if (dayActionMatch) {
|
| 84 |
const action = dayActionMatch[2];
|
| 85 |
|
|
|
|
| 101 |
return;
|
| 102 |
} else if (action === 'EXERCISE') {
|
| 103 |
await scheduleMessage(user.id, user.language === 'WOLOF'
|
| 104 |
+
? "🎙️ Yónnee sa tontu (audio walla bind) :"
|
| 105 |
: "🎙️ Envoie ta réponse (audio ou texte) :"
|
| 106 |
);
|
| 107 |
return;
|
| 108 |
+
} else if (action === 'PROMPT') {
|
| 109 |
+
const enrollment = await prisma.enrollment.findFirst({
|
| 110 |
+
where: { userId: user.id, status: 'ACTIVE' }
|
| 111 |
+
});
|
| 112 |
+
if (enrollment) {
|
| 113 |
+
const trackDay = await prisma.trackDay.findFirst({
|
| 114 |
+
where: { trackId: enrollment.trackId, dayNumber: enrollment.currentDay }
|
| 115 |
+
});
|
| 116 |
+
if (trackDay?.exercisePrompt) {
|
| 117 |
+
await scheduleMessage(user.id, trackDay.exercisePrompt);
|
| 118 |
+
} else {
|
| 119 |
+
await scheduleMessage(user.id, user.language === 'WOLOF' ? "Amul lëjj" : "Pas d'exercice pour ce jour");
|
| 120 |
+
}
|
| 121 |
+
}
|
| 122 |
+
return;
|
| 123 |
}
|
| 124 |
// CONTINUE → fall through to SUITE logic below
|
| 125 |
}
|
apps/whatsapp-worker/src/index.ts
CHANGED
|
@@ -120,19 +120,26 @@ const worker = new Worker('whatsapp-queue', async (job: Job) => {
|
|
| 120 |
console.log(`[WORKER] Downloading media ${mediaId} for ${phone}...`);
|
| 121 |
|
| 122 |
let transcribedText = '';
|
|
|
|
| 123 |
try {
|
| 124 |
const { buffer } = await downloadMedia(mediaId, accessToken);
|
| 125 |
|
| 126 |
-
//
|
| 127 |
-
// No direct require — that path doesn't exist in the worker Docker image
|
| 128 |
const API_URL_STORE = process.env.API_URL || 'http://localhost:3001';
|
| 129 |
const apiKeyStore = process.env.ADMIN_API_KEY || '';
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
|
| 137 |
// Transcribe with Whisper via API
|
| 138 |
const API_URL = process.env.API_URL || 'http://localhost:3001';
|
|
@@ -148,17 +155,15 @@ const worker = new Worker('whatsapp-queue', async (job: Job) => {
|
|
| 148 |
transcribedText = data.text || '';
|
| 149 |
console.log(`[WORKER] ✅ Transcribed: "${transcribedText.substring(0, 80)}"`);
|
| 150 |
} else if (transcribeRes.status === 429) {
|
| 151 |
-
// OpenAI quota exceeded — send fallback and requeue
|
| 152 |
const user = await prisma.user.findFirst({ where: { phone } });
|
| 153 |
if (user) {
|
| 154 |
await sendTextMessage(phone, user.language === 'WOLOF'
|
| 155 |
-
? "Bañ ma dégg sa kàddu léegi. Yónnee ci
|
| 156 |
-
: "Je n'arrive pas à analyser l'audio maintenant.
|
| 157 |
);
|
| 158 |
}
|
| 159 |
-
//
|
| 160 |
-
const requeueQ = new Queue('whatsapp-queue', { connection: connection as any });
|
| 161 |
-
await requeueQ.add('download-media', job.data, { delay: 120_000, priority: 1 });
|
| 162 |
return;
|
| 163 |
}
|
| 164 |
} catch (err: any) {
|
|
@@ -174,7 +179,7 @@ const worker = new Worker('whatsapp-queue', async (job: Job) => {
|
|
| 174 |
await fetch(`${API_URL}/v1/internal/handle-message`, {
|
| 175 |
method: 'POST',
|
| 176 |
headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${apiKey}` },
|
| 177 |
-
body: JSON.stringify({ phone, text: transcribedText })
|
| 178 |
}).catch(e => console.error('[WORKER] handle-message call failed:', e.message));
|
| 179 |
}
|
| 180 |
}
|
|
|
|
| 120 |
console.log(`[WORKER] Downloading media ${mediaId} for ${phone}...`);
|
| 121 |
|
| 122 |
let transcribedText = '';
|
| 123 |
+
let audioUrl = '';
|
| 124 |
try {
|
| 125 |
const { buffer } = await downloadMedia(mediaId, accessToken);
|
| 126 |
|
| 127 |
+
// Store audio on R2 via the API
|
|
|
|
| 128 |
const API_URL_STORE = process.env.API_URL || 'http://localhost:3001';
|
| 129 |
const apiKeyStore = process.env.ADMIN_API_KEY || '';
|
| 130 |
+
try {
|
| 131 |
+
const storeRes = await fetch(`${API_URL_STORE}/v1/ai/store-audio`, {
|
| 132 |
+
method: 'POST',
|
| 133 |
+
headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${apiKeyStore}` },
|
| 134 |
+
body: JSON.stringify({ audioBase64: buffer.toString('base64'), mimeType, phone })
|
| 135 |
+
});
|
| 136 |
+
if (storeRes.ok) {
|
| 137 |
+
const storeData = await storeRes.json() as any;
|
| 138 |
+
if (storeData.url) audioUrl = storeData.url;
|
| 139 |
+
}
|
| 140 |
+
} catch (err) {
|
| 141 |
+
console.error('[WORKER] store-audio failed to save inbound audio archive (non-blocking)');
|
| 142 |
+
}
|
| 143 |
|
| 144 |
// Transcribe with Whisper via API
|
| 145 |
const API_URL = process.env.API_URL || 'http://localhost:3001';
|
|
|
|
| 155 |
transcribedText = data.text || '';
|
| 156 |
console.log(`[WORKER] ✅ Transcribed: "${transcribedText.substring(0, 80)}"`);
|
| 157 |
} else if (transcribeRes.status === 429) {
|
| 158 |
+
// OpenAI quota exceeded — send fallback and do NOT requeue (requested by user to not block)
|
| 159 |
const user = await prisma.user.findFirst({ where: { phone } });
|
| 160 |
if (user) {
|
| 161 |
await sendTextMessage(phone, user.language === 'WOLOF'
|
| 162 |
+
? "Bañ ma dégg sa kàddu léegi. Yónnee sa tontu ci bind (texte)."
|
| 163 |
+
: "Je n'arrive pas à analyser l'audio maintenant. Envoie ta réponse en texte (1 phrase)."
|
| 164 |
);
|
| 165 |
}
|
| 166 |
+
// Stop processing. The user will still be PENDING in DB, waiting for text reply.
|
|
|
|
|
|
|
| 167 |
return;
|
| 168 |
}
|
| 169 |
} catch (err: any) {
|
|
|
|
| 179 |
await fetch(`${API_URL}/v1/internal/handle-message`, {
|
| 180 |
method: 'POST',
|
| 181 |
headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${apiKey}` },
|
| 182 |
+
body: JSON.stringify({ phone, text: transcribedText, audioUrl })
|
| 183 |
}).catch(e => console.error('[WORKER] handle-message call failed:', e.message));
|
| 184 |
}
|
| 185 |
}
|
apps/whatsapp-worker/src/pedagogy.ts
CHANGED
|
@@ -101,16 +101,21 @@ export async function sendLessonDay(userId: string, trackId: string, dayNumber:
|
|
| 101 |
[{
|
| 102 |
title: isWolof ? "Jëfandikoo" : "Actions",
|
| 103 |
rows: [
|
| 104 |
-
{
|
| 105 |
-
id: `DAY${dayNumber}_EXERCISE`,
|
| 106 |
-
title: isWolof ? "🎙️ Yónni tontu" : "🎙️ Répondre",
|
| 107 |
-
description: isWolof ? "Yónnee sa tontu ji" : "Envoyer ta réponse à l'exercice"
|
| 108 |
-
},
|
| 109 |
{
|
| 110 |
id: `DAY${dayNumber}_REPLAY`,
|
| 111 |
-
title: isWolof ? "
|
| 112 |
description: isWolof ? "Waxtu bi ci kaw" : "Réécouter la leçon"
|
| 113 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
{
|
| 115 |
id: `DAY${dayNumber}_CONTINUE`,
|
| 116 |
title: isWolof ? "⏭️ Ci kanam" : "⏭️ Passer",
|
|
|
|
| 101 |
[{
|
| 102 |
title: isWolof ? "Jëfandikoo" : "Actions",
|
| 103 |
rows: [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
{
|
| 105 |
id: `DAY${dayNumber}_REPLAY`,
|
| 106 |
+
title: isWolof ? "🎧 Dégg ci kanam" : "🎧 Réécouter",
|
| 107 |
description: isWolof ? "Waxtu bi ci kaw" : "Réécouter la leçon"
|
| 108 |
},
|
| 109 |
+
{
|
| 110 |
+
id: `DAY${dayNumber}_PROMPT`,
|
| 111 |
+
title: isWolof ? "📝 Lëjj bi" : "📝 Faire l'exercice",
|
| 112 |
+
description: isWolof ? "Jàngaat laaj bi" : "Relire la question"
|
| 113 |
+
},
|
| 114 |
+
{
|
| 115 |
+
id: `DAY${dayNumber}_EXERCISE`,
|
| 116 |
+
title: isWolof ? "🎙️ Yónni tontu" : "🎙️ Répondre",
|
| 117 |
+
description: isWolof ? "Yónnee sa tontu ji" : "Envoyer ta réponse"
|
| 118 |
+
},
|
| 119 |
{
|
| 120 |
id: `DAY${dayNumber}_CONTINUE`,
|
| 121 |
title: isWolof ? "⏭️ Ci kanam" : "⏭️ Passer",
|
packages/database/prisma/schema.prisma
CHANGED
|
@@ -113,6 +113,8 @@ model Message {
|
|
| 113 |
userId String
|
| 114 |
direction Direction // INBOUND, OUTBOUND
|
| 115 |
channel String @default("WHATSAPP")
|
|
|
|
|
|
|
| 116 |
payload Json? // Raw payload from provider
|
| 117 |
createdAt DateTime @default(now())
|
| 118 |
|
|
|
|
| 113 |
userId String
|
| 114 |
direction Direction // INBOUND, OUTBOUND
|
| 115 |
channel String @default("WHATSAPP")
|
| 116 |
+
content String?
|
| 117 |
+
mediaUrl String?
|
| 118 |
payload Json? // Raw payload from provider
|
| 119 |
createdAt DateTime @default(now())
|
| 120 |
|