seamless-streaming

Running on T4

seamless-streaming / streaming-react-app /src /StreamingInterface.tsx

Anna Sun

Clean up interface for HF, add instructions

57f3b67 about 1 year ago

40.5 kB

	import {useCallback, useEffect, useLayoutEffect, useRef, useState} from 'react';
	import Button from '@mui/material/Button';
	import Typography from '@mui/material/Typography';
	import InputLabel from '@mui/material/InputLabel';
	import FormControl from '@mui/material/FormControl';
	import Select, {SelectChangeEvent} from '@mui/material/Select';
	import MenuItem from '@mui/material/MenuItem';
	import Stack from '@mui/material/Stack';
	import seamlessLogoUrl from './assets/seamless.svg';
	import {
	AgentCapabilities,
	BaseResponse,
	BrowserAudioStreamConfig,
	DynamicConfig,
	PartialDynamicConfig,
	SUPPORTED_INPUT_SOURCES,
	SUPPORTED_OUTPUT_MODES,
	ServerExceptionData,
	ServerSpeechData,
	ServerState,
	ServerTextData,
	StartStreamEventConfig,
	StreamingStatus,
	SupportedInputSource,
	SupportedOutputMode,
	TranslationSentences,
	} from './types/StreamingTypes';
	import FormLabel from '@mui/material/FormLabel';
	import RadioGroup from '@mui/material/RadioGroup';
	import FormControlLabel from '@mui/material/FormControlLabel';
	import Radio from '@mui/material/Radio';
	import './StreamingInterface.css';
	import RoomConfig from './RoomConfig';
	import Divider from '@mui/material/Divider';
	import {useSocket} from './useSocket';
	import {RoomState} from './types/RoomState';
	import useStable from './useStable';
	import float32To16BitPCM from './float32To16BitPCM';
	import createBufferedSpeechPlayer from './createBufferedSpeechPlayer';
	import Checkbox from '@mui/material/Checkbox';
	import Alert from '@mui/material/Alert';
	import ISO6391 from 'iso-639-1';
	import isScrolledToDocumentBottom from './isScrolledToDocumentBottom';
	import Box from '@mui/material/Box';
	import Slider from '@mui/material/Slider';
	import VolumeDown from '@mui/icons-material/VolumeDown';
	import VolumeUp from '@mui/icons-material/VolumeUp';
	import Mic from '@mui/icons-material/Mic';
	import MicOff from '@mui/icons-material/MicOff';
	import XRDialog from './react-xr/XRDialog';
	import getTranslationSentencesFromReceivedData from './getTranslationSentencesFromReceivedData';
	import {
	sliceTranslationSentencesUpToIndex,
	getTotalSentencesLength,
	} from './sliceTranslationSentencesUtils';
	import Blink from './Blink';
	import {CURSOR_BLINK_INTERVAL_MS} from './cursorBlinkInterval';
	import {getURLParams} from './URLParams';
	import debug from './debug';
	import DebugSection from './DebugSection';
	import {Grid} from '@mui/material';

	const AUDIO_STREAM_DEFAULTS: {
	[key in SupportedInputSource]: BrowserAudioStreamConfig;
	} = {
	userMedia: {
	echoCancellation: false,
	noiseSuppression: true,
	},
	displayMedia: {
	echoCancellation: false,
	noiseSuppression: false,
	},
	};

	async function requestUserMediaAudioStream(
	config: BrowserAudioStreamConfig = {
	echoCancellation: false,
	noiseSuppression: true,
	},
	) {
	const stream = await navigator.mediaDevices.getUserMedia({
	audio: {...config, channelCount: 1},
	});
	console.debug(
	'[requestUserMediaAudioStream] stream created with settings:',
	stream.getAudioTracks()?.[0]?.getSettings(),
	);
	return stream;
	}

	async function requestDisplayMediaAudioStream(
	config: BrowserAudioStreamConfig = {
	echoCancellation: false,
	noiseSuppression: false,
	},
	) {
	const stream = await navigator.mediaDevices.getDisplayMedia({
	audio: {...config, channelCount: 1},
	});
	console.debug(
	'[requestDisplayMediaAudioStream] stream created with settings:',
	stream.getAudioTracks()?.[0]?.getSettings(),
	);
	return stream;
	}

	const buttonLabelMap: {[key in StreamingStatus]: string} = {
	stopped: 'Start Streaming',
	running: 'Stop Streaming',
	starting: 'Starting...',
	};

	const BUFFER_LIMIT = 1;

	const SCROLLED_TO_BOTTOM_THRESHOLD_PX = 36;

	const GAIN_MULTIPLIER_OVER_1 = 3;

	const getGainScaledValue = (value) =>
	value > 1 ? (value - 1) * GAIN_MULTIPLIER_OVER_1 + 1 : value;

	const TOTAL_ACTIVE_TRANSCODER_WARNING_THRESHOLD = 2;

	const MAX_SERVER_EXCEPTIONS_TRACKED = 500;

	export const TYPING_ANIMATION_DELAY_MS = 6;

	export default function StreamingInterface() {
	const urlParams = getURLParams();
	const debugParam = urlParams.debug;
	const [animateTextDisplay, setAnimateTextDisplay] = useState<boolean>(
	urlParams.animateTextDisplay,
	);

	const socketObject = useSocket();
	const {socket, clientID} = socketObject;

	const [serverState, setServerState] = useState<ServerState \| null>(null);
	const [agent, setAgent] = useState<AgentCapabilities \| null>(null);
	const model = agent?.name ?? null;
	const agentsCapabilities: Array<AgentCapabilities> =
	serverState?.agentsCapabilities ?? [];
	const currentAgent: AgentCapabilities \| null =
	agentsCapabilities.find((agent) => agent.name === model) ?? null;

	const [serverExceptions, setServerExceptions] = useState<
	Array<ServerExceptionData>
	>([]);
	const [roomState, setRoomState] = useState<RoomState \| null>(null);
	const roomID = roomState?.room_id ?? null;
	const isSpeaker =
	(clientID != null && roomState?.speakers.includes(clientID)) ?? false;
	const isListener =
	(clientID != null && roomState?.listeners.includes(clientID)) ?? false;

	const [streamingStatus, setStreamingStatus] =
	useState<StreamingStatus>('stopped');

	const isStreamConfiguredRef = useRef<boolean>(false);

	const [outputMode, setOutputMode] = useState<SupportedOutputMode>('s2s&t');
	const [inputSource, setInputSource] =
	useState<SupportedInputSource>('userMedia');
	const [enableNoiseSuppression, setEnableNoiseSuppression] = useState<
	boolean \| null
	>(null);
	const [enableEchoCancellation, setEnableEchoCancellation] = useState<
	boolean \| null
	>(null);

	// Dynamic Params:
	const [targetLang, setTargetLang] = useState<string \| null>(null);

	const [serverDebugFlag, setServerDebugFlag] = useState<boolean>(
	debugParam ?? false,
	);

	const [receivedData, setReceivedData] = useState<Array<ServerTextData>>([]);
	const [
	translationSentencesAnimatedIndex,
	setTranslationSentencesAnimatedIndex,
	] = useState<number>(0);

	const lastTranslationResultRef = useRef<HTMLDivElement \| null>(null);

	const [inputStream, setInputStream] = useState<MediaStream \| null>(null);
	const [inputStreamSource, setInputStreamSource] =
	useState<MediaStreamAudioSourceNode \| null>(null);
	const audioContext = useStable<AudioContext>(() => new AudioContext());
	const [scriptNodeProcessor, setScriptNodeProcessor] =
	useState<ScriptProcessorNode \| null>(null);

	const [muted, setMuted] = useState<boolean>(false);
	// The onaudioprocess script needs an up-to-date reference to the muted state, so
	// we use a ref here and keep it in sync via useEffect
	const mutedRef = useRef<boolean>(muted);
	useEffect(() => {
	mutedRef.current = muted;
	}, [muted]);

	const [gain, setGain] = useState<number>(1);

	const isScrolledToBottomRef = useRef<boolean>(isScrolledToDocumentBottom());

	// Some config options must be set when starting streaming and cannot be chaned dynamically.
	// This controls whether they are disabled or not
	const streamFixedConfigOptionsDisabled =
	streamingStatus !== 'stopped' \|\| roomID == null;

	const bufferedSpeechPlayer = useStable(() => {
	const player = createBufferedSpeechPlayer({
	onStarted: () => {
	console.debug('📢 PLAYBACK STARTED 📢');
	},
	onEnded: () => {
	console.debug('🛑 PLAYBACK ENDED 🛑');
	},
	});

	// Start the player now so it eagerly plays audio when it arrives
	player.start();
	return player;
	});

	const translationSentencesBase: TranslationSentences =
	getTranslationSentencesFromReceivedData(receivedData);

	const translationSentencesBaseTotalLength = getTotalSentencesLength(
	translationSentencesBase,
	);

	const translationSentences: TranslationSentences = animateTextDisplay
	? sliceTranslationSentencesUpToIndex(
	translationSentencesBase,
	translationSentencesAnimatedIndex,
	)
	: translationSentencesBase;

	// We want the blinking cursor to show before any text has arrived, so let's add an empty string so that the cursor shows up
	const translationSentencesWithEmptyStartingString =
	streamingStatus === 'running' && translationSentences.length === 0
	? ['']
	: translationSentences;

	/******************************************
	* Event Handlers
	******************************************/

	const setAgentAndUpdateParams = useCallback(
	(newAgent: AgentCapabilities \| null) => {
	setAgent((prevAgent) => {
	if (prevAgent?.name !== newAgent?.name) {
	setTargetLang(newAgent?.targetLangs[0] ?? null);
	}
	return newAgent;
	});
	},
	[],
	);

	const onSetDynamicConfig = useCallback(
	async (partialConfig: PartialDynamicConfig) => {
	return new Promise<void>((resolve, reject) => {
	if (socket == null) {
	reject(new Error('[onSetDynamicConfig] socket is null '));
	return;
	}

	socket.emit(
	'set_dynamic_config',
	partialConfig,
	(result: BaseResponse) => {
	console.log('[emit result: set_dynamic_config]', result);
	if (result.status === 'ok') {
	resolve();
	} else {
	reject();
	}
	},
	);
	});
	},
	[socket],
	);

	const configureStreamAsync = ({sampleRate}: {sampleRate: number}) => {
	return new Promise<void>((resolve, reject) => {
	if (socket == null) {
	reject(new Error('[configureStreamAsync] socket is null '));
	return;
	}
	const modelName = agent?.name ?? null;
	if (modelName == null) {
	reject(new Error('[configureStreamAsync] modelName is null '));
	return;
	}

	const config: StartStreamEventConfig = {
	event: 'config',
	rate: sampleRate,
	model_name: modelName,
	debug: serverDebugFlag,
	// synchronous processing isn't implemented on the v2 pubsub server, so hardcode this to true
	async_processing: true,
	buffer_limit: BUFFER_LIMIT,
	model_type: outputMode,
	};

	console.log('[configureStreamAsync] sending config', config);

	socket.emit('configure_stream', config, (statusObject) => {
	if (statusObject.status === 'ok') {
	isStreamConfiguredRef.current = true;
	console.debug(
	'[configureStreamAsync] stream configured!',
	statusObject,
	);
	resolve();
	} else {
	isStreamConfiguredRef.current = false;
	reject(
	new Error(
	`[configureStreamAsync] configure_stream returned status: ${statusObject.status}`,
	),
	);
	return;
	}
	});
	});
	};

	const startStreaming = async () => {
	if (streamingStatus !== 'stopped') {
	console.warn(
	`Attempting to start stream when status is ${streamingStatus}`,
	);
	return;
	}

	setStreamingStatus('starting');

	if (audioContext.state === 'suspended') {
	console.warn('audioContext was suspended! resuming...');
	await audioContext.resume();
	}

	let stream: MediaStream \| null = null;

	try {
	if (inputSource === 'userMedia') {
	stream = await requestUserMediaAudioStream({
	noiseSuppression:
	enableNoiseSuppression ??
	AUDIO_STREAM_DEFAULTS['userMedia'].noiseSuppression,
	echoCancellation:
	enableEchoCancellation ??
	AUDIO_STREAM_DEFAULTS['userMedia'].echoCancellation,
	});
	} else if (inputSource === 'displayMedia') {
	stream = await requestDisplayMediaAudioStream({
	noiseSuppression:
	enableNoiseSuppression ??
	AUDIO_STREAM_DEFAULTS['displayMedia'].noiseSuppression,
	echoCancellation:
	enableEchoCancellation ??
	AUDIO_STREAM_DEFAULTS['displayMedia'].echoCancellation,
	});
	} else {
	throw new Error(`Unsupported input source requested: ${inputSource}`);
	}
	setInputStream(stream);
	} catch (e) {
	console.error('[startStreaming] media stream request failed:', e);
	setStreamingStatus('stopped');
	return;
	}

	const mediaStreamSource = audioContext.createMediaStreamSource(stream);
	setInputStreamSource(mediaStreamSource);
	/**
	* NOTE: This currently uses a deprecated way of processing the audio (createScriptProcessor), but
	* which is easy and convenient for our purposes.
	*
	* Documentation for the deprecated way of doing it is here: https://developer.mozilla.org/en-US/docs/Web/API/BaseAudioContext/createScriptProcessor
	*
	* In an ideal world this would be migrated to something like this SO answer: https://stackoverflow.com/a/65448287
	*/
	const scriptProcessor = audioContext.createScriptProcessor(16384, 1, 1);
	setScriptNodeProcessor(scriptProcessor);

	scriptProcessor.onaudioprocess = (event) => {
	if (isStreamConfiguredRef.current === false) {
	console.debug('[onaudioprocess] stream is not configured yet!');
	return;
	}
	if (socket == null) {
	console.warn('[onaudioprocess] socket is null in onaudioprocess');
	return;
	}

	if (mutedRef.current) {
	// We still want to send audio to the server when we're muted to ensure we
	// get any remaining audio back from the server, so let's pass an array length 1 with a value of 0
	const mostlyEmptyInt16Array = new Int16Array(1);
	socket.emit('incoming_audio', mostlyEmptyInt16Array);
	} else {
	const float32Audio = event.inputBuffer.getChannelData(0);
	const pcm16Audio = float32To16BitPCM(float32Audio);
	socket.emit('incoming_audio', pcm16Audio);
	}

	debug()?.sentAudio(event);
	};

	mediaStreamSource.connect(scriptProcessor);
	scriptProcessor.connect(audioContext.destination);

	bufferedSpeechPlayer.start();

	try {
	if (targetLang == null) {
	throw new Error('[startStreaming] targetLang cannot be nullish');
	}

	// When we are starting the stream we want to pass all the dynamic config values
	// available before actually configuring and starting the stream
	const fullDynamicConfig: DynamicConfig = {
	targetLanguage: targetLang,
	};

	await onSetDynamicConfig(fullDynamicConfig);

	// NOTE: this needs to be the audioContext sample rate, not the sample rate of the input stream. Not entirely sure why.
	await configureStreamAsync({
	sampleRate: audioContext.sampleRate,
	});
	} catch (e) {
	console.error('configureStreamAsync failed', e);
	setStreamingStatus('stopped');
	return;
	}

	setStreamingStatus('running');
	};

	const stopStreaming = useCallback(async () => {
	if (streamingStatus === 'stopped') {
	console.warn(
	`Attempting to stop stream when status is ${streamingStatus}`,
	);
	return;
	}

	// Stop the speech playback right away
	bufferedSpeechPlayer.stop();

	if (inputStreamSource == null \|\| scriptNodeProcessor == null) {
	console.error(
	'inputStreamSource \|\| scriptNodeProcessor is null in stopStreaming',
	);
	} else {
	inputStreamSource.disconnect(scriptNodeProcessor);
	scriptNodeProcessor.disconnect(audioContext.destination);

	// Release the mic input so we stop showing the red recording icon in the browser
	inputStream?.getTracks().forEach((track) => track.stop());
	}

	if (socket == null) {
	console.warn('Unable to emit stop_stream because socket is null');
	} else {
	socket.emit('stop_stream', (result) => {
	console.debug('[emit result: stop_stream]', result);
	});
	}

	setStreamingStatus('stopped');
	}, [
	audioContext.destination,
	bufferedSpeechPlayer,
	inputStream,
	inputStreamSource,
	scriptNodeProcessor,
	socket,
	streamingStatus,
	]);

	const onClearTranscriptForAll = useCallback(() => {
	if (socket != null) {
	socket.emit('clear_transcript_for_all');
	}
	}, [socket]);

	/******************************************
	* Effects
	******************************************/

	useEffect(() => {
	if (socket == null) {
	return;
	}

	const onRoomStateUpdate = (roomState: RoomState) => {
	setRoomState(roomState);
	};

	socket.on('room_state_update', onRoomStateUpdate);

	return () => {
	socket.off('room_state_update', onRoomStateUpdate);
	};
	}, [socket]);

	useEffect(() => {
	if (socket != null) {
	const onTranslationText = (data: ServerTextData) => {
	setReceivedData((prev) => [...prev, data]);
	debug()?.receivedText(data.payload);
	};

	const onTranslationSpeech = (data: ServerSpeechData) => {
	bufferedSpeechPlayer.addAudioToBuffer(data.payload, data.sample_rate);
	};

	socket.on('translation_text', onTranslationText);
	socket.on('translation_speech', onTranslationSpeech);

	return () => {
	socket.off('translation_text', onTranslationText);
	socket.off('translation_speech', onTranslationSpeech);
	};
	}
	}, [bufferedSpeechPlayer, socket]);

	useEffect(() => {
	if (socket != null) {
	const onServerStateUpdate = (newServerState: ServerState) => {
	setServerState(newServerState);

	// If a client creates a server lock, we want to stop streaming if we're not them
	if (
	newServerState.serverLock?.isActive === true &&
	newServerState.serverLock?.clientID !== clientID &&
	streamingStatus === 'running'
	) {
	stopStreaming();
	}

	const firstAgentNullable = newServerState.agentsCapabilities[0];
	if (agent == null && firstAgentNullable != null) {
	setAgentAndUpdateParams(firstAgentNullable);
	}
	};

	socket.on('server_state_update', onServerStateUpdate);

	return () => {
	socket.off('server_state_update', onServerStateUpdate);
	};
	}
	}, [
	agent,
	clientID,
	setAgentAndUpdateParams,
	socket,
	stopStreaming,
	streamingStatus,
	]);

	useEffect(() => {
	if (socket != null) {
	const onServerException = (
	exceptionDataWithoutClientTime: ServerExceptionData,
	) => {
	const exceptionData = {
	...exceptionDataWithoutClientTime,
	timeStringClient: new Date(
	exceptionDataWithoutClientTime['timeEpochMs'],
	).toLocaleString(),
	};

	setServerExceptions((prev) =>
	[exceptionData, ...prev].slice(0, MAX_SERVER_EXCEPTIONS_TRACKED),
	);
	console.error(
	`[server_exception] The server encountered an exception: ${exceptionData['message']}`,
	exceptionData,
	);
	};

	socket.on('server_exception', onServerException);

	return () => {
	socket.off('server_exception', onServerException);
	};
	}
	}, [socket]);

	useEffect(() => {
	if (socket != null) {
	const onClearTranscript = () => {
	setReceivedData([]);
	setTranslationSentencesAnimatedIndex(0);
	};

	socket.on('clear_transcript', onClearTranscript);

	return () => {
	socket.off('clear_transcript', onClearTranscript);
	};
	}
	}, [socket]);

	useEffect(() => {
	const onScroll = () => {
	if (isScrolledToDocumentBottom(SCROLLED_TO_BOTTOM_THRESHOLD_PX)) {
	isScrolledToBottomRef.current = true;
	return;
	}
	isScrolledToBottomRef.current = false;
	return;
	};

	document.addEventListener('scroll', onScroll);

	return () => {
	document.removeEventListener('scroll', onScroll);
	};
	}, []);

	useLayoutEffect(() => {
	if (
	lastTranslationResultRef.current != null &&
	isScrolledToBottomRef.current
	) {
	// Scroll the div to the most recent entry
	lastTranslationResultRef.current.scrollIntoView();
	}
	// Run the effect every time data is received, so that
	// we scroll to the bottom even if we're just adding text to
	// a pre-existing chunk
	}, [receivedData]);

	useEffect(() => {
	if (!animateTextDisplay) {
	return;
	}

	if (
	translationSentencesAnimatedIndex < translationSentencesBaseTotalLength
	) {
	const timeout = setTimeout(() => {
	setTranslationSentencesAnimatedIndex((prev) => prev + 1);
	debug()?.startRenderText();
	}, TYPING_ANIMATION_DELAY_MS);

	return () => clearTimeout(timeout);
	} else {
	debug()?.endRenderText();
	}
	}, [
	animateTextDisplay,
	translationSentencesAnimatedIndex,
	translationSentencesBaseTotalLength,
	]);

	/******************************************
	* Sub-components
	******************************************/

	const volumeSliderNode = (
	<Stack
	spacing={2}
	direction="row"
	sx={{mb: 1, width: '100%'}}
	alignItems="center">
	<VolumeDown color="primary" />
	<Slider
	aria-label="Volume"
	defaultValue={1}
	scale={getGainScaledValue}
	min={0}
	max={3}
	step={0.1}
	marks={[
	{value: 0, label: '0%'},
	{value: 1, label: '100%'},
	{value: 2, label: '400%'},
	{value: 3, label: '700%'},
	]}
	valueLabelFormat={(value) => `${(value * 100).toFixed(0)}%`}
	valueLabelDisplay="auto"
	value={gain}
	onChange={(_event: Event, newValue: number \| number[]) => {
	if (typeof newValue === 'number') {
	const scaledGain = getGainScaledValue(newValue);
	// We want the actual gain node to use the scaled value
	bufferedSpeechPlayer.setGain(scaledGain);
	// But we want react state to keep track of the non-scaled value
	setGain(newValue);
	} else {
	console.error(
	`[volume slider] Unexpected non-number value: ${newValue}`,
	);
	}
	}}
	/>
	<VolumeUp color="primary" />
	</Stack>
	);

	const xrDialogComponent = (
	<XRDialog
	animateTextDisplay={
	animateTextDisplay &&
	translationSentencesAnimatedIndex == translationSentencesBaseTotalLength
	}
	bufferedSpeechPlayer={bufferedSpeechPlayer}
	translationSentences={translationSentences}
	roomState={roomState}
	roomID={roomID}
	startStreaming={startStreaming}
	stopStreaming={stopStreaming}
	debugParam={debugParam}
	onARHidden={() => {
	setAnimateTextDisplay(urlParams.animateTextDisplay);
	}}
	onARVisible={() => setAnimateTextDisplay(false)}
	/>
	);

	return (
	<div className="app-wrapper-sra">
	<Box
	// eslint-disable-next-line @typescript-eslint/ban-ts-comment
	// @ts-ignore Not sure why it's complaining about complexity here
	sx={{width: '100%', maxWidth: '660px', minWidth: '320px'}}>
	<div className="main-container-sra">
	<div className="top-section-sra horizontal-padding-sra">
	<div className="header-container-sra">
	<img
	src={seamlessLogoUrl}
	className="header-icon-sra"
	alt="Seamless Translation Logo"
	height={24}
	width={24}
	/>

	<div>
	<Typography variant="h1" sx={{color: '#65676B'}}>
	Seamless Translation
	</Typography>
	</div>
	<div>
	<Typography variant="body2" sx={{color: '#65676B'}}>
	Welcome! Join a room as speaker or listener (or both), and share the room code to invite listeners.
	</Typography>
	</div>
	</div>

	<Stack spacing="22px" direction="column">
	<Box>
	<RoomConfig
	roomState={roomState}
	serverState={serverState}
	streamingStatus={streamingStatus}
	onJoinRoomOrUpdateRoles={() => {
	// If the user has switched from speaker to listener we need to tell the
	// player to play eagerly, since currently the listener doesn't have any stop/start controls
	bufferedSpeechPlayer.start();
	}}
	/>

	{isListener && !isSpeaker && (
	<Box
	sx={{
	paddingX: 6,
	paddingBottom: 2,
	marginY: 2,
	display: 'flex',
	flexDirection: 'column',
	alignItems: 'center',
	}}>
	{volumeSliderNode}
	</Box>
	)}
	</Box>

	{isSpeaker && (
	<>
	<Divider />

	<Stack spacing="12px" direction="column">
	<FormLabel id="output-modes-radio-group-label">
	Model
	</FormLabel>
	<FormControl
	disabled={
	streamFixedConfigOptionsDisabled \|\|
	agentsCapabilities.length === 0
	}
	fullWidth
	sx={{minWidth: '14em'}}>
	<InputLabel id="model-selector-input-label">
	Model
	</InputLabel>
	<Select
	labelId="model-selector-input-label"
	label="Model"
	onChange={(e: SelectChangeEvent) => {
	const newAgent =
	agentsCapabilities.find(
	(agent) => e.target.value === agent.name,
	) ?? null;
	if (newAgent == null) {
	console.error(
	'Unable to find agent with name',
	e.target.value,
	);
	}
	setAgentAndUpdateParams(newAgent);
	}}
	value={model ?? ''}>
	{agentsCapabilities.map((agent) => (
	<MenuItem value={agent.name} key={agent.name}>
	{agent.name}
	</MenuItem>
	))}
	</Select>
	</FormControl>

	</Stack>

	<Stack spacing={0.5}>
	<FormLabel id="output-modes-radio-group-label">
	Output
	</FormLabel>

	<Box sx={{paddingTop: 2, paddingBottom: 1}}>
	<FormControl fullWidth sx={{minWidth: '14em'}}>
	<InputLabel id="target-selector-input-label">
	Target Language
	</InputLabel>
	<Select
	labelId="target-selector-input-label"
	label="Target Language"
	onChange={(e: SelectChangeEvent) => {
	setTargetLang(e.target.value);
	onSetDynamicConfig({
	targetLanguage: e.target.value,
	});
	}}
	value={targetLang ?? ''}>
	{currentAgent?.targetLangs.map((langCode) => (
	<MenuItem value={langCode} key={langCode}>
	{`${ISO6391.getName(langCode)} (${langCode})`}
	</MenuItem>
	))}
	</Select>
	</FormControl>
	</Box>

	<Grid container>
	<Grid item xs={12} sm={4}>
	<FormControl
	disabled={streamFixedConfigOptionsDisabled}>
	<RadioGroup
	aria-labelledby="output-modes-radio-group-label"
	value={outputMode}
	onChange={(e) =>
	setOutputMode(
	e.target.value as SupportedOutputMode,
	)
	}
	name="output-modes-radio-buttons-group">
	{
	// TODO: Use supported modalities from agentCapabilities
	SUPPORTED_OUTPUT_MODES.map(({value, label}) => (
	<FormControlLabel
	key={value}
	value={value}
	control={<Radio />}
	label={label}
	/>
	))
	}
	</RadioGroup>
	</FormControl>
	</Grid>

	<Grid item xs={12} sm={8}>
	<Stack
	direction="column"
	spacing={1}
	alignItems="flex-start"
	sx={{flexGrow: 1}}>
	{isListener && (
	<Box
	sx={{
	flexGrow: 1,
	paddingX: 1.5,
	paddingY: 1.5,
	width: '100%',
	}}>
	{volumeSliderNode}
	</Box>
	)}
	</Stack>
	</Grid>
	</Grid>
	</Stack>

	<Typography variant="body2">
	Note: we don't recommend echo cancellation, as it may distort
	the input audio (dropping words/sentences) if there is output
	audio playing. Instead, you should use headphones if you'd like
	to listen to the output audio while speaking.
	</Typography>

	<Stack
	direction="row"
	spacing={2}
	justifyContent="space-between">
	<Box sx={{flex: 1}}>
	<FormControl disabled={streamFixedConfigOptionsDisabled}>
	<FormLabel id="input-source-radio-group-label">
	Input Source
	</FormLabel>
	<RadioGroup
	aria-labelledby="input-source-radio-group-label"
	value={inputSource}
	onChange={(e: React.ChangeEvent<HTMLInputElement>) =>
	setInputSource(
	e.target.value as SupportedInputSource,
	)
	}
	name="input-source-radio-buttons-group">
	{SUPPORTED_INPUT_SOURCES.map(({label, value}) => (
	<FormControlLabel
	key={value}
	value={value}
	control={<Radio />}
	label={label}
	/>
	))}
	</RadioGroup>
	</FormControl>
	</Box>
	<Box sx={{flex: 1}}>
	<FormControl disabled={streamFixedConfigOptionsDisabled}>
	<FormLabel>Options</FormLabel>
	<FormControlLabel
	control={
	<Checkbox
	checked={
	enableNoiseSuppression ??
	AUDIO_STREAM_DEFAULTS[inputSource]
	.noiseSuppression
	}
	onChange={(
	event: React.ChangeEvent<HTMLInputElement>,
	) =>
	setEnableNoiseSuppression(event.target.checked)
	}
	/>
	}
	label="Noise Suppression (Browser)"
	/>
	<FormControlLabel
	control={
	<Checkbox
	checked={
	enableEchoCancellation ??
	AUDIO_STREAM_DEFAULTS[inputSource]
	.echoCancellation
	}
	onChange={(
	event: React.ChangeEvent<HTMLInputElement>,
	) =>
	setEnableEchoCancellation(event.target.checked)
	}
	/>
	}
	label="Echo Cancellation (Browser)"
	/>
	<FormControlLabel
	control={
	<Checkbox
	checked={serverDebugFlag}
	onChange={(
	event: React.ChangeEvent<HTMLInputElement>,
	) => setServerDebugFlag(event.target.checked)}
	/>
	}
	label="Server Debug Flag"
	/>
	</FormControl>
	</Box>
	</Stack>

	<Stack direction="row" spacing={2}>
	{streamingStatus === 'stopped' ? (
	<Button
	variant="contained"
	onClick={startStreaming}
	disabled={
	roomID == null \|\|
	// Prevent users from starting streaming if there is a server lock with an active session
	(serverState?.serverLock?.isActive === true &&
	serverState.serverLock.clientID !== clientID)
	}>
	{buttonLabelMap[streamingStatus]}
	</Button>
	) : (
	<Button
	variant="contained"
	color={
	streamingStatus === 'running' ? 'error' : 'primary'
	}
	disabled={
	streamingStatus === 'starting' \|\| roomID == null
	}
	onClick={stopStreaming}>
	{buttonLabelMap[streamingStatus]}
	</Button>
	)}

	<Box>
	<Button
	variant="contained"
	aria-label={muted ? 'Unmute' : 'Mute'}
	color={muted ? 'info' : 'primary'}
	onClick={() => setMuted((prev) => !prev)}
	sx={{
	borderRadius: 100,
	paddingX: 0,
	minWidth: '36px',
	}}>
	{muted ? <MicOff /> : <Mic />}
	</Button>
	</Box>

	{roomID == null ? null : (
	<Box
	sx={{
	flexGrow: 1,
	display: 'flex',
	justifyContent: 'flex-end',
	}}>
	{xrDialogComponent}
	</Box>
	)}
	</Stack>

	{serverExceptions.length > 0 && (
	<div>
	<Alert severity="error">
	{`The server encountered an exception. See the browser console for details. You may need to refresh the page to continue using the app.`}
	</Alert>
	</div>
	)}

	{serverState != null &&
	serverState.totalActiveTranscoders >=
	TOTAL_ACTIVE_TRANSCODER_WARNING_THRESHOLD && (
	<div>
	<Alert severity="warning">
	{`The server currently has ${serverState?.totalActiveTranscoders} active streaming sessions. Performance may be degraded.`}
	</Alert>
	</div>
	)}

	{serverState?.serverLock != null &&
	serverState.serverLock.clientID !== clientID && (
	<div>
	<Alert severity="warning">
	{`The server is currently locked by "${serverState.serverLock.name}". Priority will be given to that client when they are streaming, and your streaming session may be halted abruptly.`}
	</Alert>
	</div>
	)}
	</>
	)}
	</Stack>

	{isListener && !isSpeaker && (
	<Box sx={{marginBottom: 1, marginTop: 2}}>
	{xrDialogComponent}
	</Box>
	)}
	</div>

	{debugParam && roomID != null && <DebugSection />}

	<div className="translation-text-container-sra horizontal-padding-sra">
	<Stack
	direction="row"
	spacing={2}
	sx={{mb: '16px', alignItems: 'center'}}>
	<Typography variant="h1" sx={{fontWeight: 700, flexGrow: 1}}>
	Transcript
	</Typography>
	{isSpeaker && (
	<Button
	variant="text"
	size="small"
	onClick={onClearTranscriptForAll}>
	Clear Transcript for All
	</Button>
	)}
	</Stack>
	<Stack direction="row">
	<div className="translation-text-sra">
	{translationSentencesWithEmptyStartingString.map(
	(sentence, index, arr) => {
	const isLast = index === arr.length - 1;
	const maybeRef = isLast
	? {ref: lastTranslationResultRef}
	: {};
	return (
	<div className="text-chunk-sra" key={index} {...maybeRef}>
	<Typography variant="body1">
	{sentence}
	{animateTextDisplay && isLast && (
	<Blink
	intervalMs={CURSOR_BLINK_INTERVAL_MS}
	shouldBlink={
	(roomState?.activeTranscoders ?? 0) > 0
	}>
	<Typography
	component="span"
	variant="body1"
	sx={{
	display: 'inline-block',
	transform: 'scaleY(1.25) translateY(-1px)',
	}}>
	{'\|'}
	</Typography>
	</Blink>
	)}
	</Typography>
	</div>
	);
	},
	)}
	</div>
	</Stack>
	</div>
	</div>
	</Box>
	</div>
	);
	}