Firefly777a commited on
Commit
6a2808e
1 Parent(s): fea335b

Changed the way the ASR model is being called. (whisper)

Browse files
Files changed (1) hide show
  1. app.py +14 -18
app.py CHANGED
@@ -37,29 +37,25 @@ Transcript: I need to buy a birthday
37
  Prediction: Present, Gift, Cake, Card
38
  Transcript: """
39
 
40
- # whisper model specification
41
- model = whisper.load_model("tiny")
42
 
43
  # openai.api_key = os.environ["Openai_APIkey"]
44
 
45
- def debug_inference(audio, prompt, model, temperature, state=""):
46
- # load audio data
47
- audio = whisper.load_audio(audio)
48
- # ensure sample is in correct format for inference
49
- audio = whisper.pad_or_trim(audio)
50
 
51
- # generate a log-mel spetrogram of the audio data
52
- mel = whisper.log_mel_spectrogram(audio)
53
-
54
- _, probs = model.detect_language(mel)
55
 
56
- # decode audio data
57
- options = whisper.DecodingOptions(fp16 = False)
58
- # transcribe speech to text
59
- result = whisper.decode(model, mel, options)
60
- print("result pre gp model from whisper: ", result, ".text ", result.text, "and the data type: ", type(result.text))
61
 
62
- text = prompt + result.text + "\nPrediction: "
 
 
 
 
 
63
 
64
  response = openai.Completion.create(
65
  model=model,
@@ -82,7 +78,7 @@ def debug_inference(audio, prompt, model, temperature, state=""):
82
  infers = list(map(lambda x: x.replace("\n", ""), temp))
83
  #infered = list(map(lambda x: x.split(','), infers))
84
 
85
- return result.text, state, infers, text
86
 
87
  # get audio from microphone
88
  gr.Interface(
 
37
  Prediction: Present, Gift, Cake, Card
38
  Transcript: """
39
 
40
+ # whisper model specification
41
+ asr_model = whisper.load_model("tiny")
42
 
43
  # openai.api_key = os.environ["Openai_APIkey"]
44
 
 
 
 
 
 
45
 
 
 
 
 
46
 
47
+ # Transcribe function
48
+ def transcribe(audio_file):
49
+ print("Transcribing")
50
+ transcription = asr_model.transcribe(audio_file)["text"]
51
+ return transcription
52
 
53
+ def debug_inference(audio, prompt, model, temperature, state=""):
54
+ # Transcribe with Whisper
55
+ print("The audio is:", audio)
56
+ transcript = transcribe(audio)
57
+
58
+ text = prompt + transcript + "\nPrediction: "
59
 
60
  response = openai.Completion.create(
61
  model=model,
 
78
  infers = list(map(lambda x: x.replace("\n", ""), temp))
79
  #infered = list(map(lambda x: x.split(','), infers))
80
 
81
+ return transcript, state, infers, text
82
 
83
  # get audio from microphone
84
  gr.Interface(