Spaces:

ehmargondal
/

audiocraft-hackathon

Build error

App Files Files Community

ehmargondal commited on Aug 31, 2023

Commit

43549bc

•

1 Parent(s): e22d339

Upload app.py

Browse files

Files changed (1) hide show

app.py +155 -0

app.py ADDED Viewed

	@@ -0,0 +1,155 @@

+# -*- coding: utf-8 -*-
+"""Audio Craft Hackathon Story Working.ipynb
+Automatically generated by Colaboratory.
+Original file is located at
+    https://colab.research.google.com/drive/1L2rUzh1qFdVpFOHxLSEPkHACiyQv812n
+"""
+!pip install virtualenv
+!virtualenv venv
+!source venv/bin/activate
+!nvidia-smi
+!pip install --upgrade --quiet pip
+!pip install --quiet git+https://github.com/huggingface/transformers.git datasets[audio]
+!pip install gTTS
+!pip install gradio
+!pip install pydub
+!pip install nltk
+!pip install openai
+!pip install torchaudio
+from transformers import MusicgenForConditionalGeneration
+model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+import torch
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+model.to(device);
+audio_length_in_s = 256 / model.config.audio_encoder.frame_rate
+audio_length_in_s
+from transformers import AutoProcessor
+processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
+from datasets import load_dataset
+dataset = load_dataset("sanchit-gandhi/gtzan", split="train", streaming=True)
+sample = next(iter(dataset))["audio"]
+sampling_rate = model.config.audio_encoder.sampling_rate
+# take the first half of the audio sample
+sample["array"] = sample["array"][: len(sample["array"]) // 2]
+from pydub import AudioSegment
+import gradio as gr
+import openai
+OPENAI_API_KEY = "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"
+openai.api_key = OPENAI_API_KEY
+def get_story(prompt):
+    try:
+        response = openai.ChatCompletion.create(
+            model="gpt-3.5-turbo",
+            messages=[
+                {"role": "user", "content": f"You are a professional story teller and you will have to write a detailed story. Please Generate a Story about the following {prompt}"},
+            ]
+        )
+        response_message = response["choices"][0]["message"]
+        if response_message["role"] == "assistant":
+            return response_message["content"]
+    except Exception as e:
+        return str(e)
+def get_music_description(story):
+   try:
+        response = openai.ChatCompletion.create(
+            model="gpt-3.5-turbo",
+            messages=[
+                {"role": "user", "content": f"You are a Audio and you will have to give text descirption for the theme song of a story. Please Generate a Generate One Line Audio Description about the following Story: {story}"},
+            ]
+        )
+        response_message = response["choices"][0]["message"]
+        if response_message["role"] == "assistant":
+            return response_message["content"]
+   except Exception as e:
+        return str(e)
+import scipy
+sampling_rate = model.config.audio_encoder.sampling_rate
+import numpy as np
+def get_bgm(prompt):
+  file = "audio.wav"
+  from transformers import AutoProcessor
+  processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
+  inputs = processor(
+      text=[prompt,],
+      padding=True,
+      return_tensors="pt",
+  )
+  audio_values = model.generate(**inputs.to(device), do_sample=True, guidance_scale=3, max_new_tokens=256)
+  #scipy.io.wavfile.write(file, rate=sampling_rate, data=,)
+  return sampling_rate,audio_values[0,0].cpu().numpy()
+import requests
+def get_narration(story):
+  file = "narration.mp3"
+  CHUNK_SIZE = 1024
+  url = "https://api.elevenlabs.io/v1/text-to-speech/XB0fDUnXU5powFXDhCwa"
+  headers = {
+  "Accept": "audio/mpeg",
+  "Content-Type": "application/json",
+  "xi-api-key": "7a0e6698796cdcbeaaaabf1a0abcd1ce"
+  }
+  data = {
+    "text": story,
+    "model_id": "eleven_monolingual_v1",
+    "voice_settings": {
+      "stability": 0.5,
+      "similarity_boost": 0.5
+    }
+  }
+  response = requests.post(url, json=data, headers=headers)
+  with open(file, 'wb') as f:
+      for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
+          if chunk:
+              f.write(chunk)
+  return file
+def generate_story_bgs(prompt):
+  story = get_story(prompt)
+  music_des = get_music_description(story)
+  bgm = get_bgm(music_des)
+  narration = get_narration(story)
+  return story , bgm, narration
+iface = gr.Interface(
+    fn=generate_story_bgs,
+    inputs=[gr.inputs.Textbox(type='text', label="What do you want your story to be about?")],
+    outputs=[
+        gr.outputs.Textbox(type='text', label="Story will appear here"),
+        gr.outputs.Audio(type="numpy",label="Theme Music Will Appear here"),
+        gr.outputs.Audio(type="filepath",label="Narration")
+        ],
+    live=False
+)
+iface.queue().launch(share=True, debug=True)
+!pip freeze > requirements.txt