tttarun commited on
Commit
1fcf11f
·
verified ·
1 Parent(s): b64cf80

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -0
app.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import transformers
2
+ import gradio as gr
3
+ import librosa
4
+ import torch
5
+ import spaces
6
+ import numpy as np
7
+
8
+ @spaces.GPU(duration=60)
9
+ def transcribe_and_respond(audio_file):
10
+ try:
11
+ pipe = transformers.pipeline(
12
+ model='sarvamai/shuka_v1',
13
+ trust_remote_code=True,
14
+ device=0,
15
+ torch_dtype=torch.bfloat16
16
+ )
17
+
18
+ # Load the audio file
19
+ audio, sr = librosa.load(audio_file, sr=16000)
20
+
21
+ # Print audio properties for debugging
22
+ print(f"Audio dtype: {audio.dtype}, Audio shape: {audio.shape}, Sample rate: {sr}")
23
+
24
+ turns = [
25
+ {'role': 'system', 'content': 'Respond naturally and informatively.'},
26
+ {'role': 'user', 'content': '<|audio|>'}
27
+ ]
28
+
29
+ # Debug: Print the initial turns
30
+ print(f"Initial turns: {turns}")
31
+
32
+ # Call the model with the audio and prompt
33
+ output = pipe({'audio': audio, 'turns': turns, 'sampling_rate': sr}, max_new_tokens=512)
34
+
35
+ # Debug: Print the final output from the model
36
+ print(f"Model output: {output}")
37
+
38
+ return output
39
+
40
+ except Exception as e:
41
+ return f"Error: {str(e)}"
42
+
43
+ iface = gr.Interface(
44
+ fn=transcribe_and_respond,
45
+ inputs=gr.Audio(type="filepath"),
46
+ outputs="text",
47
+ title="Live Transcription and Response",
48
+ description="Speak into your microphone, and the model will respond naturally and informatively.",
49
+ live=True
50
+ )
51
+
52
+ if __name__ == "__main__":
53
+ iface.launch()
54
+