audiocraft-hackathon / audio_craft_hackathon_story_working.py
ehmargondal's picture
Upload 2 files
62b7343
raw
history blame contribute delete
No virus
4.54 kB
# -*- coding: utf-8 -*-
"""Audio Craft Hackathon Story Working.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1L2rUzh1qFdVpFOHxLSEPkHACiyQv812n
"""
!pip install virtualenv
!virtualenv venv
!source venv/bin/activate
!nvidia-smi
!pip install --upgrade --quiet pip
!pip install --quiet git+https://github.com/huggingface/transformers.git datasets[audio]
!pip install gTTS
!pip install gradio
!pip install pydub
!pip install nltk
!pip install openai
!pip install torchaudio
from transformers import MusicgenForConditionalGeneration
model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
import torch
device = "cuda:0" if torch.cuda.is_available() else "cpu"
model.to(device);
audio_length_in_s = 256 / model.config.audio_encoder.frame_rate
audio_length_in_s
from transformers import AutoProcessor
processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
from datasets import load_dataset
dataset = load_dataset("sanchit-gandhi/gtzan", split="train", streaming=True)
sample = next(iter(dataset))["audio"]
sampling_rate = model.config.audio_encoder.sampling_rate
# take the first half of the audio sample
sample["array"] = sample["array"][: len(sample["array"]) // 2]
from pydub import AudioSegment
import gradio as gr
import openai
OPENAI_API_KEY = "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"
openai.api_key = OPENAI_API_KEY
def get_story(prompt):
try:
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "user", "content": f"You are a professional story teller and you will have to write a detailed story. Please Generate a Story about the following {prompt}"},
]
)
response_message = response["choices"][0]["message"]
if response_message["role"] == "assistant":
return response_message["content"]
except Exception as e:
return str(e)
def get_music_description(story):
try:
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "user", "content": f"You are a Audio and you will have to give text descirption for the theme song of a story. Please Generate a Generate One Line Audio Description about the following Story: {story}"},
]
)
response_message = response["choices"][0]["message"]
if response_message["role"] == "assistant":
return response_message["content"]
except Exception as e:
return str(e)
import scipy
sampling_rate = model.config.audio_encoder.sampling_rate
import numpy as np
def get_bgm(prompt):
file = "audio.wav"
from transformers import AutoProcessor
processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
inputs = processor(
text=[prompt,],
padding=True,
return_tensors="pt",
)
audio_values = model.generate(**inputs.to(device), do_sample=True, guidance_scale=3, max_new_tokens=256)
#scipy.io.wavfile.write(file, rate=sampling_rate, data=,)
return sampling_rate,audio_values[0,0].cpu().numpy()
import requests
def get_narration(story):
file = "narration.mp3"
CHUNK_SIZE = 1024
url = "https://api.elevenlabs.io/v1/text-to-speech/XB0fDUnXU5powFXDhCwa"
headers = {
"Accept": "audio/mpeg",
"Content-Type": "application/json",
"xi-api-key": "7a0e6698796cdcbeaaaabf1a0abcd1ce"
}
data = {
"text": story,
"model_id": "eleven_monolingual_v1",
"voice_settings": {
"stability": 0.5,
"similarity_boost": 0.5
}
}
response = requests.post(url, json=data, headers=headers)
with open(file, 'wb') as f:
for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
if chunk:
f.write(chunk)
return file
def generate_story_bgs(prompt):
story = get_story(prompt)
music_des = get_music_description(story)
bgm = get_bgm(music_des)
narration = get_narration(story)
return story , bgm, narration
iface = gr.Interface(
fn=generate_story_bgs,
inputs=[gr.inputs.Textbox(type='text', label="What do you want your story to be about?")],
outputs=[
gr.outputs.Textbox(type='text', label="Story will appear here"),
gr.outputs.Audio(type="numpy",label="Theme Music Will Appear here"),
gr.outputs.Audio(type="filepath",label="Narration")
],
live=False
)
iface.queue().launch(share=True, debug=True)
!pip freeze > requirements.txt