texttoaudioapi / main.py
prakharC47's picture
Update main.py
4fcd1c3 verified
raw
history blame contribute delete
No virus
1.41 kB
from fastapi import FastAPI
from IPython.display import Audio
import chardet
import torch
from transformers import BarkModel
from transformers import AutoProcessor
import soundfile as sf
def pre_process_text(text):
# Detect encoding
result = chardet.detect(text.encode())
encoding = result['encoding']
if encoding != 'utf-8':
# Decode and re-encode to UTF-8
decoded_text = text.decode(encoding)
text = decoded_text.encode('utf-8').decode('utf-8') # Double encode ensures proper conversion
return text
model = BarkModel.from_pretrained("suno/bark-small")
device = "cuda:0" if torch.cuda.is_available() else "cpu"
model = model.to(device)
processor = AutoProcessor.from_pretrained("suno/bark")
app = FastAPI()
@app.get("/")
def root():
return "ttsapi"
@app.get("/infer")
def tts(input="this is testing ustable space"):
text_prompt = "Let's try generating speech, with Bark, a text-to-speech model"
inputs = processor(text_prompt)
speech_output = model.generate(**inputs.to(device))
sampling_rate = model.generation_config.sample_rate
audio_data = Audio(speech_output[0].cpu().numpy(), rate=sampling_rate)
audio_bytes, _ = sf.write(None, audio_data, samplerate=sampling_rate) # Example using soundfile for WAV
return Response(content=audio_bytes, media_type="audio/wav", headers={"Content-Disposition": "attachment; filename=audio.wav"})