Spaces:

ysharma
/

test_speech_to_text

Build error

File size: 1,737 Bytes

5b5d4af
 
11f7102
5b5d4af
 
006d225
6c3ad82
 
 
 
 
5b5d4af
9ca6873
6c3ad82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9ca6873
 
c78ff8f
 
 
5b5d4af
 
 
 
 
9ca6873
 
5b5d4af
 
6c3ad82
5b5d4af

import os
import numpy as np
import gradio as gr
import whisper

model = whisper.load_model("base")
##Bloom
API_URL = "https://api-inference.huggingface.co/models/bigscience/bloom"
HF_TOKEN = os.environ["HF_TOKEN"]
headers = {"Authorization": f"Bearer {HF_TOKEN}"}


def fun(audio) : #, state=''):
  text1 = model.transcribe(audio)["text"]
  text2 = lang_model_response(text)
  return text1, text2
  
def lang_model_response(prompt): 
  print(f"*****Inside meme_generate - Prompt is :{prompt}")
  if len(prompt) == 0:
    prompt = """Can you help me please?"""
  
  json_ = {"inputs": prompt,
            "parameters":
            {
          "top_p": top_p, #0.90 default
          "max_new_tokens": 64,
          "temperature": temp, #1.1 default
          "return_full_text": True,
          "do_sample": True,
          }, 
          "options": 
          {"use_cache": True,
          "wait_for_model": True, 
          },}
  response = requests.post(API_URL, headers=headers, json=json_)
  print(f"Response  is : {response}")
  output = response.json()
  print(f"output is : {output}") 
  output_tmp = output[0]['generated_text']
  print(f"output_tmp is: {output_tmp}")
  solution = output_tmp[0]   #output_tmp.split("\nQ:")[0]   
  print(f"Final response after splits is: {solution}")
    
  #meme_image, new_prompt = write_on_image(solution)  
  return solution
  
def fun1(audio, state=''):
  text = model.transcribe(audio)["text"]
  state += text + " "
  return state, state
  
gr.Interface(
    title = 'Testing Whisper', 
    fn=fun, 
    inputs=[
        gr.Audio(source="microphone",  type="filepath"), #streaming = True,
       # "state"
    ],
    outputs=[
        "textbox",  "textbox"
    ],
    live=True).launch()