Vignesh Varadarajan commited on
Commit
0fd7f51
·
1 Parent(s): 6d77a99

Add gradio app

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.wav filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from pathlib import Path
4
+ from vogent_turn.inference import TurnDetector
5
+ import soundfile as sf
6
+ import numpy as np
7
+
8
+
9
+ def get_detector():
10
+ """Lazy load the detector to avoid initialization during import"""
11
+ detector = TurnDetector(compile_model=False, warmup=False)
12
+ return detector
13
+
14
+ # Initialize the turn detector
15
+ detector = get_detector()
16
+
17
+ # Get all preset names from samples folder
18
+ def get_presets():
19
+ samples_dir = Path("samples")
20
+ if not samples_dir.exists():
21
+ return []
22
+ presets = [d.name for d in samples_dir.iterdir() if d.is_dir()]
23
+ return sorted(presets)
24
+
25
+ # Load preset data
26
+ def load_preset(preset_name):
27
+ """Load audio and text files from the selected preset"""
28
+ if not preset_name:
29
+ return None, "", ""
30
+
31
+ preset_dir = Path("samples") / preset_name
32
+
33
+ # Load audio
34
+ audio_path = preset_dir / "audio.wav"
35
+ audio_file = str(audio_path) if audio_path.exists() else None
36
+
37
+ # Load text files
38
+ prev_text = ""
39
+ curr_text = ""
40
+
41
+ prev_path = preset_dir / "prev.txt"
42
+ if prev_path.exists():
43
+ prev_text = prev_path.read_text().strip()
44
+
45
+ text_path = preset_dir / "text.txt"
46
+ if text_path.exists():
47
+ curr_text = text_path.read_text().strip()
48
+
49
+ return audio_file, prev_text, curr_text
50
+
51
+ # Run inference
52
+ def run_inference(audio_file, prev_text, curr_text):
53
+ """Run turn detection inference"""
54
+ if audio_file is None:
55
+ return "Error: No audio file provided"
56
+
57
+ try:
58
+ # Load audio file
59
+ audio, sr = sf.read(audio_file)
60
+
61
+ # Convert to mono if stereo
62
+ if len(audio.shape) > 1:
63
+ audio = audio.mean(axis=1)
64
+
65
+ # Ensure audio is float32
66
+ audio = audio.astype(np.float32)
67
+
68
+ # Run prediction with context
69
+ result = detector.predict(
70
+ audio,
71
+ prev_line=prev_text if prev_text else None,
72
+ curr_line=curr_text if curr_text else None,
73
+ return_probs=True,
74
+ sample_rate=sr,
75
+ )
76
+
77
+ # Format output
78
+ is_endpoint = result['is_endpoint']
79
+ prob_endpoint = result['prob_endpoint']
80
+ prob_continue = result['prob_continue']
81
+
82
+ output = f"""
83
+ **Turn Detection Result:**
84
+
85
+ - **Prediction:** {'Turn Complete (Endpoint)' if is_endpoint else 'Turn Incomplete (Continue)'}
86
+ - **Probability of Endpoint:** {prob_endpoint:.4f}
87
+ - **Probability of Continue:** {prob_continue:.4f}
88
+ """
89
+ return output
90
+
91
+ except Exception as e:
92
+ return f"Error during inference: {str(e)}"
93
+
94
+ # Get default preset and load its data
95
+ presets = get_presets()
96
+ default_preset = presets[0] if presets else None
97
+ default_audio, default_prev_text, default_curr_text = load_preset(default_preset) if default_preset else (None, "", "")
98
+
99
+ # Create Gradio interface
100
+ with gr.Blocks(title="Turn Detection Dashboard") as demo:
101
+ gr.Markdown("# Turn Detection Dashboard")
102
+ gr.Markdown("Multimodal turn detection using audio and text context")
103
+
104
+ with gr.Row():
105
+ with gr.Column():
106
+ # Preset selector
107
+ preset_dropdown = gr.Dropdown(
108
+ choices=presets,
109
+ label="Preset Samples",
110
+ info="Select a preset to auto-fill the fields",
111
+ value=default_preset
112
+ )
113
+
114
+ # Input fields
115
+ prev_text_input = gr.Textbox(
116
+ label="Previous Line Text",
117
+ placeholder="Enter the previous line of dialog...",
118
+ lines=2,
119
+ value=default_prev_text
120
+ )
121
+
122
+ curr_text_input = gr.Textbox(
123
+ label="Current Line Text",
124
+ placeholder="Enter the current line being spoken...",
125
+ lines=2,
126
+ value=default_curr_text
127
+ )
128
+
129
+ audio_input = gr.Audio(
130
+ label="Current Line Audio",
131
+ type="filepath",
132
+ value=default_audio
133
+ )
134
+
135
+ # Inference button
136
+ inference_btn = gr.Button("Run Inference", variant="primary")
137
+
138
+ with gr.Column():
139
+ # Output
140
+ output_text = gr.Markdown(label="Results")
141
+
142
+ # Connect preset dropdown to load function
143
+ preset_dropdown.change(
144
+ fn=load_preset,
145
+ inputs=[preset_dropdown],
146
+ outputs=[audio_input, prev_text_input, curr_text_input]
147
+ )
148
+
149
+ # Connect inference button
150
+ inference_btn.click(
151
+ fn=run_inference,
152
+ inputs=[audio_input, prev_text_input, curr_text_input],
153
+ outputs=[output_text]
154
+ )
155
+
156
+ if __name__ == "__main__":
157
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ vogent-turn==0.1.0
2
+ gradio==5.49.1
samples/complete_1/audio.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75e61f9505f8617707ea587edd06408002c34385c099073ed1d9094bf43e4dde
3
+ size 203480
samples/complete_1/prev.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ what is your phone number
samples/complete_1/text.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ my number is 8042221111
samples/incomplete_1/audio.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bc047d2fc4e84a4330453ac230e28a9a6c2601765baec6c0618f78bc75f1e0b
3
+ size 148184
samples/incomplete_1/prev.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ what is your phone number
samples/incomplete_1/text.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ my number is 804