Spaces:

nvidia
/

voice-agent-examples

Running

App Files Files Community

voice-agent-examples / examples /voice_agent_webrtc_langgraph /websocket_transcript_output.py

fciannella

Working with service run on 7860

53ea588 about 1 month ago

raw

history blame

3.2 kB

	# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
	# SPDX-License-Identifier: BSD 2-Clause License

	"""Websocket transcript output for the voice agent webrtc demo."""

	import uuid

	from fastapi import WebSocket
	from pipecat.frames.frames import BotStoppedSpeakingFrame, Frame
	from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
	from pipecat.processors.metrics.frame_processor_metrics import FrameProcessorMetrics
	from pydantic import BaseModel

	from nvidia_pipecat.frames.transcripts import (
	BotUpdatedSpeakingTranscriptFrame,
	UserStoppedSpeakingTranscriptFrame,
	UserUpdatedSpeakingTranscriptFrame,
	)


	class Transcript(BaseModel):
	"""Transcript model for the websocket."""

	text: str
	actor: str
	message_id: str


	class WebsocketTranscriptOutput(FrameProcessor):
	"""Frame processor to send transcripts to the websocket."""

	def __init__(self, ws: WebSocket, name: str \| None = None, metrics: FrameProcessorMetrics \| None = None, **kwargs):
	"""Initialize the frame processor.

	Args:
	ws (WebSocket): The websocket to send the transcripts to.
	name (str): The name of the frame processor.
	metrics (FrameProcessorMetrics): The metrics for the frame processor.
	kwargs (dict): Additional keyword arguments.
	"""
	super().__init__(name=name, metrics=metrics, **kwargs)
	self._ws = ws
	self._message_id_user = uuid.uuid4()
	self._message_id_bot = uuid.uuid4()
	self._last_bot_transcript = ""

	async def process_frame(self, frame: Frame, direction: FrameDirection):
	"""Process the frame and send the transcript to the websocket."""
	await super().process_frame(frame, direction)
	if isinstance(frame, BotUpdatedSpeakingTranscriptFrame):
	if self._last_bot_transcript != frame.transcript:
	self._last_bot_transcript += " " + frame.transcript
	if self._ws is not None:
	await self._ws.send_text(
	Transcript(
	text=self._last_bot_transcript, actor="bot", message_id=str(self._message_id_bot)
	).model_dump_json()
	)
	elif isinstance(frame, BotStoppedSpeakingFrame):
	self._message_id_bot = uuid.uuid4()
	self._last_bot_transcript = ""
	elif isinstance(frame, UserUpdatedSpeakingTranscriptFrame):
	if self._ws is not None:
	await self._ws.send_text(
	Transcript(
	text=frame.transcript, actor="user", message_id=str(self._message_id_user)
	).model_dump_json()
	)
	elif isinstance(frame, UserStoppedSpeakingTranscriptFrame):
	if self._ws is not None:
	await self._ws.send_text(
	Transcript(
	text=frame.transcript, actor="user", message_id=str(self._message_id_user)
	).model_dump_json()
	)
	self._message_id_user = uuid.uuid4()
	await super().push_frame(frame, direction)