ehmargondal commited on
Commit
43549bc
1 Parent(s): e22d339

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +155 -0
app.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Audio Craft Hackathon Story Working.ipynb
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1L2rUzh1qFdVpFOHxLSEPkHACiyQv812n
8
+ """
9
+
10
+ !pip install virtualenv
11
+ !virtualenv venv
12
+
13
+ !source venv/bin/activate
14
+
15
+ !nvidia-smi
16
+
17
+ !pip install --upgrade --quiet pip
18
+ !pip install --quiet git+https://github.com/huggingface/transformers.git datasets[audio]
19
+
20
+ !pip install gTTS
21
+ !pip install gradio
22
+ !pip install pydub
23
+ !pip install nltk
24
+ !pip install openai
25
+ !pip install torchaudio
26
+
27
+ from transformers import MusicgenForConditionalGeneration
28
+ model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
29
+
30
+ import torch
31
+
32
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
33
+ model.to(device);
34
+
35
+ audio_length_in_s = 256 / model.config.audio_encoder.frame_rate
36
+
37
+ audio_length_in_s
38
+
39
+ from transformers import AutoProcessor
40
+
41
+ processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
42
+
43
+ from datasets import load_dataset
44
+
45
+ dataset = load_dataset("sanchit-gandhi/gtzan", split="train", streaming=True)
46
+ sample = next(iter(dataset))["audio"]
47
+ sampling_rate = model.config.audio_encoder.sampling_rate
48
+ # take the first half of the audio sample
49
+ sample["array"] = sample["array"][: len(sample["array"]) // 2]
50
+
51
+ from pydub import AudioSegment
52
+ import gradio as gr
53
+ import openai
54
+ OPENAI_API_KEY = "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"
55
+ openai.api_key = OPENAI_API_KEY
56
+
57
+ def get_story(prompt):
58
+ try:
59
+ response = openai.ChatCompletion.create(
60
+ model="gpt-3.5-turbo",
61
+ messages=[
62
+ {"role": "user", "content": f"You are a professional story teller and you will have to write a detailed story. Please Generate a Story about the following {prompt}"},
63
+ ]
64
+ )
65
+ response_message = response["choices"][0]["message"]
66
+ if response_message["role"] == "assistant":
67
+ return response_message["content"]
68
+
69
+ except Exception as e:
70
+ return str(e)
71
+
72
+ def get_music_description(story):
73
+ try:
74
+ response = openai.ChatCompletion.create(
75
+ model="gpt-3.5-turbo",
76
+ messages=[
77
+ {"role": "user", "content": f"You are a Audio and you will have to give text descirption for the theme song of a story. Please Generate a Generate One Line Audio Description about the following Story: {story}"},
78
+ ]
79
+ )
80
+ response_message = response["choices"][0]["message"]
81
+ if response_message["role"] == "assistant":
82
+ return response_message["content"]
83
+
84
+ except Exception as e:
85
+ return str(e)
86
+
87
+ import scipy
88
+
89
+ sampling_rate = model.config.audio_encoder.sampling_rate
90
+
91
+ import numpy as np
92
+ def get_bgm(prompt):
93
+ file = "audio.wav"
94
+ from transformers import AutoProcessor
95
+ processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
96
+ inputs = processor(
97
+ text=[prompt,],
98
+ padding=True,
99
+ return_tensors="pt",
100
+ )
101
+ audio_values = model.generate(**inputs.to(device), do_sample=True, guidance_scale=3, max_new_tokens=256)
102
+ #scipy.io.wavfile.write(file, rate=sampling_rate, data=,)
103
+ return sampling_rate,audio_values[0,0].cpu().numpy()
104
+
105
+ import requests
106
+
107
+ def get_narration(story):
108
+ file = "narration.mp3"
109
+ CHUNK_SIZE = 1024
110
+ url = "https://api.elevenlabs.io/v1/text-to-speech/XB0fDUnXU5powFXDhCwa"
111
+ headers = {
112
+ "Accept": "audio/mpeg",
113
+ "Content-Type": "application/json",
114
+ "xi-api-key": "7a0e6698796cdcbeaaaabf1a0abcd1ce"
115
+ }
116
+
117
+ data = {
118
+ "text": story,
119
+ "model_id": "eleven_monolingual_v1",
120
+ "voice_settings": {
121
+ "stability": 0.5,
122
+ "similarity_boost": 0.5
123
+ }
124
+ }
125
+
126
+ response = requests.post(url, json=data, headers=headers)
127
+ with open(file, 'wb') as f:
128
+ for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
129
+ if chunk:
130
+ f.write(chunk)
131
+ return file
132
+
133
+
134
+
135
+ def generate_story_bgs(prompt):
136
+ story = get_story(prompt)
137
+ music_des = get_music_description(story)
138
+ bgm = get_bgm(music_des)
139
+ narration = get_narration(story)
140
+ return story , bgm, narration
141
+
142
+ iface = gr.Interface(
143
+ fn=generate_story_bgs,
144
+ inputs=[gr.inputs.Textbox(type='text', label="What do you want your story to be about?")],
145
+ outputs=[
146
+ gr.outputs.Textbox(type='text', label="Story will appear here"),
147
+ gr.outputs.Audio(type="numpy",label="Theme Music Will Appear here"),
148
+ gr.outputs.Audio(type="filepath",label="Narration")
149
+ ],
150
+ live=False
151
+ )
152
+
153
+ iface.queue().launch(share=True, debug=True)
154
+
155
+ !pip freeze > requirements.txt