# -*- coding: utf-8 -*-
"""Audio Craft Hackathon Story Working.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1L2rUzh1qFdVpFOHxLSEPkHACiyQv812n
"""

!pip install virtualenv
!virtualenv venv

!source venv/bin/activate

!nvidia-smi

!pip install --upgrade --quiet pip
!pip install --quiet git+https://github.com/huggingface/transformers.git datasets[audio]

!pip install gTTS
!pip install gradio
!pip install pydub
!pip install nltk
!pip install openai
!pip install torchaudio

from transformers import MusicgenForConditionalGeneration
model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")

import torch

device = "cuda:0" if torch.cuda.is_available() else "cpu"
model.to(device);

audio_length_in_s = 256 / model.config.audio_encoder.frame_rate

audio_length_in_s

from transformers import AutoProcessor

processor = AutoProcessor.from_pretrained("facebook/musicgen-small")

from datasets import load_dataset

dataset = load_dataset("sanchit-gandhi/gtzan", split="train", streaming=True)
sample = next(iter(dataset))["audio"]
sampling_rate = model.config.audio_encoder.sampling_rate
# take the first half of the audio sample
sample["array"] = sample["array"][: len(sample["array"]) // 2]

from pydub import AudioSegment
import gradio as gr
import openai
OPENAI_API_KEY = "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"
openai.api_key = OPENAI_API_KEY

def get_story(prompt):
    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "user", "content": f"You are a professional story teller and you will have to write a detailed story. Please Generate a Story about the following {prompt}"},
            ]
        )
        response_message = response["choices"][0]["message"]
        if response_message["role"] == "assistant":
            return response_message["content"]

    except Exception as e:
        return str(e)

def get_music_description(story):
   try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "user", "content": f"You are a Audio and you will have to give text descirption for the theme song of a story. Please Generate a Generate One Line Audio Description about the following Story: {story}"},
            ]
        )
        response_message = response["choices"][0]["message"]
        if response_message["role"] == "assistant":
            return response_message["content"]

   except Exception as e:
        return str(e)

import scipy

sampling_rate = model.config.audio_encoder.sampling_rate

import numpy as np
def get_bgm(prompt):
  file = "audio.wav"
  from transformers import AutoProcessor
  processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
  inputs = processor(
      text=[prompt,],
      padding=True,
      return_tensors="pt",
  )
  audio_values = model.generate(**inputs.to(device), do_sample=True, guidance_scale=3, max_new_tokens=256)
  #scipy.io.wavfile.write(file, rate=sampling_rate, data=,)
  return sampling_rate,audio_values[0,0].cpu().numpy()

import requests

def get_narration(story):
  file = "narration.mp3"
  CHUNK_SIZE = 1024
  url = "https://api.elevenlabs.io/v1/text-to-speech/XB0fDUnXU5powFXDhCwa"
  headers = {
  "Accept": "audio/mpeg",
  "Content-Type": "application/json",
  "xi-api-key": "7a0e6698796cdcbeaaaabf1a0abcd1ce"
  }

  data = {
    "text": story,
    "model_id": "eleven_monolingual_v1",
    "voice_settings": {
      "stability": 0.5,
      "similarity_boost": 0.5
    }
  }

  response = requests.post(url, json=data, headers=headers)
  with open(file, 'wb') as f:
      for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
          if chunk:
              f.write(chunk)
  return file


def generate_story_bgs(prompt):
  story = get_story(prompt)
  music_des = get_music_description(story)
  bgm = get_bgm(music_des)
  narration = get_narration(story)
  return story , bgm, narration

iface = gr.Interface(
    fn=generate_story_bgs,
    inputs=[gr.inputs.Textbox(type='text', label="What do you want your story to be about?")],
    outputs=[
        gr.outputs.Textbox(type='text', label="Story will appear here"),
        gr.outputs.Audio(type="numpy",label="Theme Music Will Appear here"),
        gr.outputs.Audio(type="filepath",label="Narration")
        ],
    live=False
)

iface.queue().launch(share=True, debug=True)

!pip freeze > requirements.txt