Spaces:

tttarun
/

shuka_testing

Running

App Files Files Community

tttarun commited on 10 days ago

Commit

1fcf11f

verified ·

1 Parent(s): b64cf80

Create app.py

Browse files

Files changed (1) hide show

app.py +54 -0

app.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import transformers
+import gradio as gr
+import librosa
+import torch
+import spaces
+import numpy as np
+@spaces.GPU(duration=60)
+def transcribe_and_respond(audio_file):
+    try:
+        pipe = transformers.pipeline(
+            model='sarvamai/shuka_v1',
+            trust_remote_code=True,
+            device=0,
+            torch_dtype=torch.bfloat16
+        )
+        # Load the audio file
+        audio, sr = librosa.load(audio_file, sr=16000)
+        # Print audio properties for debugging
+        print(f"Audio dtype: {audio.dtype}, Audio shape: {audio.shape}, Sample rate: {sr}")
+        turns = [
+            {'role': 'system', 'content': 'Respond naturally and informatively.'},
+            {'role': 'user', 'content': '<|audio|>'}
+        ]
+        # Debug: Print the initial turns
+        print(f"Initial turns: {turns}")
+        # Call the model with the audio and prompt
+        output = pipe({'audio': audio, 'turns': turns, 'sampling_rate': sr}, max_new_tokens=512)
+        # Debug: Print the final output from the model
+        print(f"Model output: {output}")
+        return output
+    except Exception as e:
+        return f"Error: {str(e)}"
+iface = gr.Interface(
+    fn=transcribe_and_respond,
+    inputs=gr.Audio(type="filepath"),
+    outputs="text",
+    title="Live Transcription and Response",
+    description="Speak into your microphone, and the model will respond naturally and informatively.",
+    live=True
+)
+if __name__ == "__main__":
+    iface.launch()