Spaces:

vogent
/

vogent-turn-demo

Running

App Files Files Community

Vignesh Varadarajan commited on Oct 20

Commit

0fd7f51

1 Parent(s): 6d77a99

Add gradio app

Browse files

Files changed (9) hide show

.gitattributes +1 -0
app.py +157 -0
requirements.txt +2 -0
samples/complete_1/audio.wav +3 -0
samples/complete_1/prev.txt +1 -0
samples/complete_1/text.txt +1 -0
samples/incomplete_1/audio.wav +3 -0
samples/incomplete_1/prev.txt +1 -0
samples/incomplete_1/text.txt +1 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.wav filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,157 @@

+import gradio as gr
+import os
+from pathlib import Path
+from vogent_turn.inference import TurnDetector
+import soundfile as sf
+import numpy as np
+def get_detector():
+    """Lazy load the detector to avoid initialization during import"""
+    detector = TurnDetector(compile_model=False, warmup=False)
+    return detector
+# Initialize the turn detector
+detector = get_detector()
+# Get all preset names from samples folder
+def get_presets():
+    samples_dir = Path("samples")
+    if not samples_dir.exists():
+        return []
+    presets = [d.name for d in samples_dir.iterdir() if d.is_dir()]
+    return sorted(presets)
+# Load preset data
+def load_preset(preset_name):
+    """Load audio and text files from the selected preset"""
+    if not preset_name:
+        return None, "", ""
+    preset_dir = Path("samples") / preset_name
+    # Load audio
+    audio_path = preset_dir / "audio.wav"
+    audio_file = str(audio_path) if audio_path.exists() else None
+    # Load text files
+    prev_text = ""
+    curr_text = ""
+    prev_path = preset_dir / "prev.txt"
+    if prev_path.exists():
+        prev_text = prev_path.read_text().strip()
+    text_path = preset_dir / "text.txt"
+    if text_path.exists():
+        curr_text = text_path.read_text().strip()
+    return audio_file, prev_text, curr_text
+# Run inference
+def run_inference(audio_file, prev_text, curr_text):
+    """Run turn detection inference"""
+    if audio_file is None:
+        return "Error: No audio file provided"
+    try:
+        # Load audio file
+        audio, sr = sf.read(audio_file)
+        # Convert to mono if stereo
+        if len(audio.shape) > 1:
+            audio = audio.mean(axis=1)
+        # Ensure audio is float32
+        audio = audio.astype(np.float32)
+        # Run prediction with context
+        result = detector.predict(
+            audio,
+            prev_line=prev_text if prev_text else None,
+            curr_line=curr_text if curr_text else None,
+            return_probs=True,
+            sample_rate=sr,
+        )
+        # Format output
+        is_endpoint = result['is_endpoint']
+        prob_endpoint = result['prob_endpoint']
+        prob_continue = result['prob_continue']
+        output = f"""
+**Turn Detection Result:**
+- **Prediction:** {'Turn Complete (Endpoint)' if is_endpoint else 'Turn Incomplete (Continue)'}
+- **Probability of Endpoint:** {prob_endpoint:.4f}
+- **Probability of Continue:** {prob_continue:.4f}
+"""
+        return output
+    except Exception as e:
+        return f"Error during inference: {str(e)}"
+# Get default preset and load its data
+presets = get_presets()
+default_preset = presets[0] if presets else None
+default_audio, default_prev_text, default_curr_text = load_preset(default_preset) if default_preset else (None, "", "")
+# Create Gradio interface
+with gr.Blocks(title="Turn Detection Dashboard") as demo:
+    gr.Markdown("# Turn Detection Dashboard")
+    gr.Markdown("Multimodal turn detection using audio and text context")
+    with gr.Row():
+        with gr.Column():
+            # Preset selector
+            preset_dropdown = gr.Dropdown(
+                choices=presets,
+                label="Preset Samples",
+                info="Select a preset to auto-fill the fields",
+                value=default_preset
+            )
+            # Input fields
+            prev_text_input = gr.Textbox(
+                label="Previous Line Text",
+                placeholder="Enter the previous line of dialog...",
+                lines=2,
+                value=default_prev_text
+            )
+            curr_text_input = gr.Textbox(
+                label="Current Line Text",
+                placeholder="Enter the current line being spoken...",
+                lines=2,
+                value=default_curr_text
+            )
+            audio_input = gr.Audio(
+                label="Current Line Audio",
+                type="filepath",
+                value=default_audio
+            )
+            # Inference button
+            inference_btn = gr.Button("Run Inference", variant="primary")
+        with gr.Column():
+            # Output
+            output_text = gr.Markdown(label="Results")
+    # Connect preset dropdown to load function
+    preset_dropdown.change(
+        fn=load_preset,
+        inputs=[preset_dropdown],
+        outputs=[audio_input, prev_text_input, curr_text_input]
+    )
+    # Connect inference button
+    inference_btn.click(
+        fn=run_inference,
+        inputs=[audio_input, prev_text_input, curr_text_input],
+        outputs=[output_text]
+    )
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ vogent-turn==0.1.0
2	+ gradio==5.49.1

samples/complete_1/audio.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:75e61f9505f8617707ea587edd06408002c34385c099073ed1d9094bf43e4dde
+size 203480

samples/complete_1/prev.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ what is your phone number

samples/complete_1/text.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ my number is 8042221111

samples/incomplete_1/audio.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3bc047d2fc4e84a4330453ac230e28a9a6c2601765baec6c0618f78bc75f1e0b
+size 148184

samples/incomplete_1/prev.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ what is your phone number

samples/incomplete_1/text.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ my number is 804