| |
| |
| |
| |
| |
| |
| import { |
| App, |
| type McpUiHostContext, |
| applyDocumentTheme, |
| } from "@modelcontextprotocol/ext-apps"; |
| import "./global.css"; |
| import "./mcp-app.css"; |
|
|
| const log = { |
| info: console.log.bind(console, "[Transcript]"), |
| warn: console.warn.bind(console, "[Transcript]"), |
| error: console.error.bind(console, "[Transcript]"), |
| }; |
|
|
| |
| |
| |
|
|
| const mainEl = document.querySelector(".transcript-app") as HTMLElement; |
| const levelBarEl = document.getElementById("level-bar")!; |
| const micLevelEl = document.getElementById("mic-level")!; |
| const timerEl = document.getElementById("timer")!; |
| const transcriptEl = document.getElementById("transcript")!; |
| const startBtn = document.getElementById("start-btn")!; |
| const copyBtn = document.getElementById("copy-btn")!; |
| const clearBtn = document.getElementById("clear-btn")!; |
| const sendBtn = document.getElementById("send-btn") as HTMLButtonElement; |
|
|
| |
| |
| |
|
|
| let isListening = false; |
| let lastSentIndex = 0; |
|
|
| |
| let timerStart: number | null = null; |
| let timerInterval: number | null = null; |
|
|
| |
| let audioContext: AudioContext | null = null; |
| let micStream: MediaStream | null = null; |
| let micAnalyser: AnalyserNode | null = null; |
| let animationFrame: number | null = null; |
|
|
| |
| let recognition: SpeechRecognition | null = null; |
|
|
| |
| |
| |
|
|
| const app = new App({ name: "Live Transcript", version: "1.0.0" }); |
|
|
| app.onteardown = async () => { |
| log.info("App teardown"); |
| stopListening(); |
| return {}; |
| }; |
|
|
| app.onerror = log.error; |
|
|
| app.onhostcontextchanged = (ctx: McpUiHostContext) => { |
| if (ctx.safeAreaInsets) { |
| mainEl.style.paddingTop = `${ctx.safeAreaInsets.top}px`; |
| mainEl.style.paddingRight = `${ctx.safeAreaInsets.right}px`; |
| mainEl.style.paddingBottom = `${ctx.safeAreaInsets.bottom}px`; |
| mainEl.style.paddingLeft = `${ctx.safeAreaInsets.left}px`; |
| } |
| if (ctx.theme) { |
| applyDocumentTheme(ctx.theme); |
| } |
| }; |
|
|
| |
| |
| |
|
|
| async function startAudioCapture(): Promise<boolean> { |
| try { |
| audioContext = new AudioContext(); |
| micStream = await navigator.mediaDevices.getUserMedia({ audio: true }); |
|
|
| const source = audioContext.createMediaStreamSource(micStream); |
| micAnalyser = audioContext.createAnalyser(); |
| micAnalyser.fftSize = 256; |
| source.connect(micAnalyser); |
|
|
| updateAudioLevels(); |
| log.info("Audio capture started"); |
| return true; |
| } catch (e) { |
| log.error("Failed to start audio capture:", e); |
| return false; |
| } |
| } |
|
|
| function updateAudioLevels() { |
| if (micAnalyser && isListening) { |
| const dataArray = new Uint8Array(micAnalyser.frequencyBinCount); |
| micAnalyser.getByteFrequencyData(dataArray); |
|
|
| const average = dataArray.reduce((a, b) => a + b, 0) / dataArray.length; |
| const level = Math.min(100, (average / 128) * 100); |
| micLevelEl.style.width = `${level}%`; |
| } else { |
| micLevelEl.style.width = "0%"; |
| } |
|
|
| animationFrame = requestAnimationFrame(updateAudioLevels); |
| } |
|
|
| function stopAudioCapture() { |
| if (animationFrame) { |
| cancelAnimationFrame(animationFrame); |
| animationFrame = null; |
| } |
|
|
| if (micStream) { |
| micStream.getTracks().forEach((track) => track.stop()); |
| micStream = null; |
| } |
|
|
| if (audioContext) { |
| audioContext.close(); |
| audioContext = null; |
| } |
|
|
| micLevelEl.style.width = "0%"; |
| } |
|
|
| |
| |
| |
|
|
| function startSpeechRecognition(): boolean { |
| const SpeechRecognitionCtor = |
| window.SpeechRecognition || window.webkitSpeechRecognition; |
|
|
| if (!SpeechRecognitionCtor) { |
| log.warn("Speech recognition not supported"); |
| return false; |
| } |
|
|
| recognition = new SpeechRecognitionCtor(); |
| recognition.continuous = true; |
| recognition.interimResults = true; |
| recognition.lang = "en-US"; |
|
|
| recognition.onstart = () => { |
| log.info("Speech recognition started"); |
| }; |
|
|
| recognition.onresult = (event) => { |
| const e = event as SpeechRecognitionEvent; |
| for (let i = e.resultIndex; i < e.results.length; i++) { |
| const result = e.results[i]; |
| const transcript = result[0].transcript; |
|
|
| if (result.isFinal) { |
| addTranscriptEntry(transcript, true); |
| updateSendButton(); |
| updateModelContext(); |
| } else { |
| updateInterimTranscript(transcript); |
| } |
| } |
| }; |
|
|
| recognition.onerror = (event) => { |
| const e = event as SpeechRecognitionErrorEvent; |
| log.error("Speech recognition error:", e.error); |
| if (e.error === "not-allowed") { |
| addTranscriptEntry("Microphone access denied", true); |
| stopListening(); |
| } |
| }; |
|
|
| recognition.onend = () => { |
| log.info("Speech recognition ended"); |
| if (isListening) { |
| |
| try { |
| recognition?.start(); |
| } catch (e) { |
| |
| } |
| } |
| }; |
|
|
| try { |
| recognition.start(); |
| return true; |
| } catch (e) { |
| log.error("Failed to start speech recognition:", e); |
| return false; |
| } |
| } |
|
|
| function stopSpeechRecognition() { |
| if (recognition) { |
| try { |
| recognition.stop(); |
| } catch (e) { |
| |
| } |
| recognition = null; |
| } |
| } |
|
|
| |
| |
| |
|
|
| function clearTranscriptPlaceholder() { |
| const placeholder = transcriptEl.querySelector(".transcript-placeholder"); |
| if (placeholder) { |
| placeholder.remove(); |
| } |
| } |
|
|
| function formatTime(seconds: number): string { |
| const mins = Math.floor(seconds / 60); |
| const secs = seconds % 60; |
| return `${mins}:${secs.toString().padStart(2, "0")}`; |
| } |
|
|
| function startTimer() { |
| timerStart = Date.now(); |
| timerEl.textContent = "0:00"; |
| timerEl.classList.add("active"); |
| timerInterval = window.setInterval(() => { |
| if (timerStart) { |
| const elapsed = Math.floor((Date.now() - timerStart) / 1000); |
| timerEl.textContent = formatTime(elapsed); |
| } |
| }, 1000); |
| } |
|
|
| function stopTimer() { |
| if (timerInterval) { |
| clearInterval(timerInterval); |
| timerInterval = null; |
| } |
| timerEl.classList.remove("active"); |
| } |
|
|
| function addTranscriptEntry(text: string, isFinal: boolean) { |
| |
| if (!text.trim()) return; |
|
|
| clearTranscriptPlaceholder(); |
|
|
| |
| const interim = transcriptEl.querySelector(".transcript-entry.interim"); |
| if (interim) { |
| interim.remove(); |
| } |
|
|
| const timestamp = new Date().toLocaleTimeString(); |
|
|
| const entry = document.createElement("p"); |
| entry.className = `transcript-entry${isFinal ? "" : " interim"}`; |
| entry.innerHTML = `<div class="timestamp">${timestamp}</div>${escapeHtml(text)}`; |
| transcriptEl.appendChild(entry); |
| } |
|
|
| function updateInterimTranscript(text: string) { |
| clearTranscriptPlaceholder(); |
|
|
| let interim = transcriptEl.querySelector( |
| ".transcript-entry.interim", |
| ) as HTMLElement; |
| if (!interim) { |
| interim = document.createElement("p"); |
| interim.className = "transcript-entry interim"; |
| transcriptEl.appendChild(interim); |
| } |
|
|
| const timestamp = new Date().toLocaleTimeString(); |
| interim.innerHTML = `<div class="timestamp">${timestamp}</div>${escapeHtml(text)}`; |
| } |
|
|
| function escapeHtml(text: string): string { |
| const div = document.createElement("div"); |
| div.textContent = text; |
| return div.innerHTML; |
| } |
|
|
| function formatEntry(entry: HTMLElement): string { |
| const timestamp = entry.querySelector(".timestamp")?.textContent?.trim(); |
| const clone = entry.cloneNode(true) as HTMLElement; |
| clone.querySelector(".timestamp")?.remove(); |
| const text = clone.textContent?.trim() || ""; |
| if (!text) return ""; |
| return timestamp ? `[${timestamp}] ${text}` : text; |
| } |
|
|
| function formatEntries(entries: HTMLElement[]): string { |
| return entries.map(formatEntry).filter(Boolean).join("\n"); |
| } |
|
|
| function getAllEntries(): HTMLElement[] { |
| return Array.from( |
| transcriptEl.querySelectorAll(".transcript-entry:not(.interim)"), |
| ) as HTMLElement[]; |
| } |
|
|
| function getUnsentEntries(): HTMLElement[] { |
| return getAllEntries().slice(lastSentIndex); |
| } |
|
|
| function getAllTranscriptText(): string { |
| return formatEntries(getAllEntries()); |
| } |
|
|
| function getUnsentText(): string { |
| return formatEntries(getUnsentEntries()); |
| } |
|
|
| function updateSendButton() { |
| const unsentEntries = getUnsentEntries(); |
| sendBtn.disabled = unsentEntries.length === 0; |
| } |
|
|
| function updateModelContext() { |
| const caps = app.getHostCapabilities(); |
| if (!caps?.updateModelContext) return; |
|
|
| const text = getUnsentText(); |
| log.info("Updating model context:", text || "(empty)"); |
|
|
| app |
| .updateModelContext({ |
| content: text |
| ? [{ type: "text", text: `[Live transcript]: ${text}` }] |
| : [], |
| }) |
| .catch((e: unknown) => { |
| log.warn("Failed to update model context:", e); |
| }); |
| } |
|
|
| |
| |
| |
|
|
| async function startListening() { |
| isListening = true; |
| startBtn.innerHTML = ` |
| <svg class="btn-icon" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"> |
| <rect x="6" y="4" width="4" height="16"/> |
| <rect x="14" y="4" width="4" height="16"/> |
| </svg> |
| Stop |
| `; |
| startBtn.classList.add("recording"); |
| levelBarEl.classList.add("active"); |
| startTimer(); |
|
|
| const micOk = await startAudioCapture(); |
| if (!micOk) { |
| addTranscriptEntry("Microphone access denied", true); |
| stopListening(); |
| return; |
| } |
|
|
| if (!startSpeechRecognition()) { |
| addTranscriptEntry("Speech recognition not available", true); |
| stopListening(); |
| } |
| } |
|
|
| function stopListening() { |
| isListening = false; |
| startBtn.innerHTML = ` |
| <svg class="btn-icon" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"> |
| <polygon points="5 3 19 12 5 21 5 3"/> |
| </svg> |
| Start |
| `; |
| startBtn.classList.remove("recording"); |
| levelBarEl.classList.remove("active"); |
| stopTimer(); |
|
|
| stopSpeechRecognition(); |
| stopAudioCapture(); |
| } |
|
|
| startBtn.addEventListener("click", () => { |
| if (isListening) { |
| stopListening(); |
| } else { |
| startListening(); |
| } |
| }); |
|
|
| copyBtn.addEventListener("click", async () => { |
| const text = getAllTranscriptText(); |
| if (!text) return; |
|
|
| try { |
| await navigator.clipboard.writeText(text); |
| |
| copyBtn.classList.add("copied"); |
| setTimeout(() => copyBtn.classList.remove("copied"), 1000); |
| log.info("Transcript copied to clipboard"); |
| } catch (e) { |
| log.error("Failed to copy:", e); |
| } |
| }); |
|
|
| clearBtn.addEventListener("click", () => { |
| transcriptEl.innerHTML = |
| '<p class="transcript-placeholder">Your speech will appear here...</p>'; |
| lastSentIndex = 0; |
| updateSendButton(); |
| updateModelContext(); |
| }); |
|
|
| sendBtn.addEventListener("click", async () => { |
| const unsentEntries = getUnsentEntries(); |
| if (unsentEntries.length === 0) return; |
|
|
| const transcriptText = getUnsentText(); |
| if (!transcriptText) return; |
|
|
| log.info("Sending transcript:", transcriptText); |
|
|
| try { |
| const { isError } = await app.sendMessage({ |
| role: "user", |
| content: [{ type: "text", text: transcriptText }], |
| }); |
|
|
| if (isError) { |
| log.warn("Message was rejected"); |
| } else { |
| log.info("Message sent successfully"); |
|
|
| |
| unsentEntries.forEach((entry) => entry.classList.add("sent")); |
|
|
| |
| transcriptEl.querySelector(".sent-divider")?.remove(); |
|
|
| |
| const lastEntry = unsentEntries[unsentEntries.length - 1]; |
| const divider = document.createElement("div"); |
| divider.className = "sent-divider"; |
| divider.innerHTML = `<span>sent ${new Date().toLocaleTimeString()}</span>`; |
| lastEntry.insertAdjacentElement("afterend", divider); |
|
|
| |
| const allEntries = transcriptEl.querySelectorAll( |
| ".transcript-entry:not(.interim)", |
| ); |
| lastSentIndex = allEntries.length; |
|
|
| updateSendButton(); |
| updateModelContext(); |
| } |
| } catch (e) { |
| log.error("Failed to send message:", e); |
| } |
| }); |
|
|
| |
| |
| |
|
|
| app.connect().then(() => { |
| log.info("Connected to host"); |
| const ctx = app.getHostContext(); |
| if (ctx) { |
| app.onhostcontextchanged?.(ctx); |
| } |
| }); |
|
|