Spaces:

AkashMnd
/

Gandalf

Sleeping

App Files Files Community

Gandalf / app.py

AkashMnd

Update app.py

d7e9e68 7 months ago

raw

history blame contribute delete

No virus

3.36 kB

	import gradio as gr
	import base64
	import numpy as np
	import soundfile as sf
	import os
	import requests
	import json

	API_URL = os.getenv("API_URL")
	API_KEY = os.getenv("API_KEY")
	API_URL2 = os.getenv("API_URL2")
	def audio_to_base64(audio):
	sr, data = audio
	# Save audio data to a temporary file
	temp_file = "temp.wav"
	sf.write(temp_file, data, sr, format='wav')

	# Read the temporary file as binary and encode it to base64
	with open(temp_file, "rb") as audio_file:
	base64_audio = base64.b64encode(audio_file.read()).decode("utf-8")

	# Remove the temporary file
	os.remove(temp_file)

	response_text = send_to_api(base64_audio)
	response_json = json.loads(response_text)
	output_text = response_json["output"]["segments"][0]["text"]

	# Make the second API call
	second_api_response = second_api_call(output_text)

	return second_api_response

	def send_to_api(base64_audio):
	payload = {
	"input": {
	"audio_base64": base64_audio,
	"model": "tiny",
	"transcription": "plain text",
	"translate": True,
	"language": "en",
	"temperature": 0,
	"best_of": 5,
	"beam_size": 5,
	"patience": 1,
	"suppress_tokens": "-1",
	"condition_on_previous_text": False,
	"temperature_increment_on_fallback": 0.2,
	"compression_ratio_threshold": 2.4,
	"logprob_threshold": -1,
	"no_speech_threshold": 0.6,
	"word_timestamps": False,
	"initial_prompt": "You are a voice assistant for Bhuvan Portal by ISRO"
	},
	"enable_vad": True
	}

	headers = {
	"accept": "application/json",
	"content-type": "application/json",
	"authorization": API_KEY
	}

	response = requests.post(API_URL, json=payload, headers=headers)

	return response.text

	def second_api_call(prompt_text):

	payload = {
	"input": {
	"prompt": prompt_text,
	"sampling_params": {
	"max_tokens": 2048,
	"n": 1,
	"best_of": None,
	"presence_penalty": 0,
	"frequency_penalty": 0,
	"temperature": 0.5,
	"top_p": 1,
	"top_k": -1,
	"use_beam_search": False,
	"stop": ["USER"],
	"ignore_eos": False,
	"logprobs": None
	}
	}
	}

	headers = {
	"accept": "application/json",
	"content-type": "application/json",
	"authorization": API_KEY
	}

	response = requests.post(API_URL2, json=payload, headers=headers)

	response_json = json.loads(response.text)
	output_text = response_json["output"]["text"][0] # Extract the "text" field
	output_text = output_text.replace("\\n", "\n") # Replace "\n" with an actual new line

	return output_text



	demo = gr.Interface(
	fn=audio_to_base64,
	inputs=["microphone"],
	outputs="text",
	title="Voice Assistant for SIF Hackathon Our Vision (Submit Again if error pops up)",
	description="Speak into the microphone and see the LLM response. (Faster Whisper tiny + llama13B)",
	theme='WeixuanYuan/Soft_dark'
	)

	if __name__ == "__main__":
	demo.launch()