Spaces:

ehmargondal
/

audiocraft-hackathon

Build error

App Files Files Community

audiocraft-hackathon / audio_craft_hackathon_story_working.py

ehmargondal

Upload 2 files

62b7343 about 1 year ago

raw

history blame contribute delete

No virus

4.54 kB

	# -- coding: utf-8 --
	"""Audio Craft Hackathon Story Working.ipynb

	Automatically generated by Colaboratory.

	Original file is located at
	https://colab.research.google.com/drive/1L2rUzh1qFdVpFOHxLSEPkHACiyQv812n
	"""

	!pip install virtualenv
	!virtualenv venv

	!source venv/bin/activate

	!nvidia-smi

	!pip install --upgrade --quiet pip
	!pip install --quiet git+https://github.com/huggingface/transformers.git datasets[audio]

	!pip install gTTS
	!pip install gradio
	!pip install pydub
	!pip install nltk
	!pip install openai
	!pip install torchaudio

	from transformers import MusicgenForConditionalGeneration
	model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")

	import torch

	device = "cuda:0" if torch.cuda.is_available() else "cpu"
	model.to(device);

	audio_length_in_s = 256 / model.config.audio_encoder.frame_rate

	audio_length_in_s

	from transformers import AutoProcessor

	processor = AutoProcessor.from_pretrained("facebook/musicgen-small")

	from datasets import load_dataset

	dataset = load_dataset("sanchit-gandhi/gtzan", split="train", streaming=True)
	sample = next(iter(dataset))["audio"]
	sampling_rate = model.config.audio_encoder.sampling_rate
	# take the first half of the audio sample
	sample["array"] = sample["array"][: len(sample["array"]) // 2]

	from pydub import AudioSegment
	import gradio as gr
	import openai
	OPENAI_API_KEY = "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"
	openai.api_key = OPENAI_API_KEY

	def get_story(prompt):
	try:
	response = openai.ChatCompletion.create(
	model="gpt-3.5-turbo",
	messages=[
	{"role": "user", "content": f"You are a professional story teller and you will have to write a detailed story. Please Generate a Story about the following {prompt}"},
	]
	)
	response_message = response["choices"][0]["message"]
	if response_message["role"] == "assistant":
	return response_message["content"]

	except Exception as e:
	return str(e)

	def get_music_description(story):
	try:
	response = openai.ChatCompletion.create(
	model="gpt-3.5-turbo",
	messages=[
	{"role": "user", "content": f"You are a Audio and you will have to give text descirption for the theme song of a story. Please Generate a Generate One Line Audio Description about the following Story: {story}"},
	]
	)
	response_message = response["choices"][0]["message"]
	if response_message["role"] == "assistant":
	return response_message["content"]

	except Exception as e:
	return str(e)

	import scipy

	sampling_rate = model.config.audio_encoder.sampling_rate

	import numpy as np
	def get_bgm(prompt):
	file = "audio.wav"
	from transformers import AutoProcessor
	processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
	inputs = processor(
	text=[prompt,],
	padding=True,
	return_tensors="pt",
	)
	audio_values = model.generate(**inputs.to(device), do_sample=True, guidance_scale=3, max_new_tokens=256)
	#scipy.io.wavfile.write(file, rate=sampling_rate, data=,)
	return sampling_rate,audio_values[0,0].cpu().numpy()

	import requests

	def get_narration(story):
	file = "narration.mp3"
	CHUNK_SIZE = 1024
	url = "https://api.elevenlabs.io/v1/text-to-speech/XB0fDUnXU5powFXDhCwa"
	headers = {
	"Accept": "audio/mpeg",
	"Content-Type": "application/json",
	"xi-api-key": "7a0e6698796cdcbeaaaabf1a0abcd1ce"
	}

	data = {
	"text": story,
	"model_id": "eleven_monolingual_v1",
	"voice_settings": {
	"stability": 0.5,
	"similarity_boost": 0.5
	}
	}

	response = requests.post(url, json=data, headers=headers)
	with open(file, 'wb') as f:
	for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
	if chunk:
	f.write(chunk)
	return file



	def generate_story_bgs(prompt):
	story = get_story(prompt)
	music_des = get_music_description(story)
	bgm = get_bgm(music_des)
	narration = get_narration(story)
	return story , bgm, narration

	iface = gr.Interface(
	fn=generate_story_bgs,
	inputs=[gr.inputs.Textbox(type='text', label="What do you want your story to be about?")],
	outputs=[
	gr.outputs.Textbox(type='text', label="Story will appear here"),
	gr.outputs.Audio(type="numpy",label="Theme Music Will Appear here"),
	gr.outputs.Audio(type="filepath",label="Narration")
	],
	live=False
	)

	iface.queue().launch(share=True, debug=True)

	!pip freeze > requirements.txt