import time import gradio as gr import requests import tempfile import os token = os.environ['apikey'] API_HOST = "https://api.voicemod.net" def getPayload(index, audio_file): new_name = audio_file.replace("-0-100.wav", ".m4a") command = f"ffmpeg -y -f wav -i {audio_file} -f ipod {new_name}" print("Executing command: " + command) os.system(command) voiceIds = [ "banana", "bob", "pilot", "drone", "evil", "ogre", "cave" ] payload={'voiceId': voiceIds[index]} files=[ ('audioFile',('audio.m4a',open(new_name,'rb'),'application/octet-stream')) ] return payload, files def cleanUpLines(lines): return list(filter(None, lines)) def greet(index, audio_file): url = API_HOST + "/v1/cloud/audio?convertMp3=true" print("URL: " + url) print("Audio file path: ", audio_file) payload, files = getPayload(index, audio_file) headers = { 'x-api-key': token, } print(payload) print("Before the call") response = requests.request("POST", url, headers=headers, data=payload, files=files) print("After the call...") print(response.status_code) print(response.text) jsonResp = response.json() print(jsonResp) return gr.make_waveform(download_file(jsonResp['convertedFileUrl'])) def download_file(url): response = requests.get(url) if response.status_code == 200: with tempfile.NamedTemporaryFile(delete=False) as tmp_file: tmp_file.write(response.content) tmp_file.flush() return tmp_file.name else: print("Error: Unable to download file") with gr.Blocks() as demo: gr.Markdown(""" ## Voicemod's Speech-to-Speech API **Try some of our voice filters on your own voice with this demo!** To use the demo, follow the instructions: 1. First, select the voice you want from the dropdown 2. Then record yourself saying someting funny (or not) 3. ??? 4. PROFIT! (or rather, play the resulting audio) ### Recommendation For better results, make sure you leave a small amount of silence at the end of the recording (1 second or less is fine). Also, a little bit of acting never hurts. Especially for the ones like **Evil**, results are a lot more fun when you do some acting while speaking :) ## Join our Community If you'd like to know more and meet other developers working with this technology, join our [Discord Server](https://discord.gg/vm-dev-community)! """) voices = [ "Banana", "Bob", "Pilot", "Drone", "Evil", "Ogre", "Cave" ] with gr.Row(): with gr.Column(): with gr.Row(): dd = gr.Dropdown(choices=voices, type="index", label="1. Select the voice...") audioInput = gr.Audio(type="filepath", label="2. Record yourself saying something...", source="microphone", ) with gr.Row(): btn = gr.Button("Run") with gr.Column(): video = gr.Video(label="3. Play the generated audio") video.style(height=300) gr.Markdown(""" ## Want to use this API for your project? If you'd like to use this API for your own project, request access through our [form here](https://voicemod.typeform.com/to/KqeNN6bO) and join our [Dev Community on Discord](https://discord.gg/vm-dev-community)! """) btn.click(fn=greet, inputs=[ dd, audioInput ], outputs=video) demo.launch()