Spaces:

naotokui
/

TR-ChatGPT

Running

App Files Files Community

TR-ChatGPT / app.py

naotokui

fixe

0264f32 over 1 year ago

raw

history blame contribute delete

6.5 kB

	#%%
	import openai
	import numpy as np
	import pretty_midi
	import re
	import numpy as np
	import os
	import gradio as gr
	import librosa

	openai.api_key = os.environ.get("OPENAI_API_KEY")

	# sample data
	markdown_table_sample = """8th

	\| \| 1 \| 2 \| 3 \| 4 \| 5 \| 6 \| 7 \| 8 \|
	\|----\|---\|---\|---\|---\|---\|---\|---\|---\|
	\| BD \| x \| \| x \| \| \| \| x \| \|
	\| SD \| \| \| \| x \| \| \| \| x \|
	\| CH \| x \| \| x \| \| x \| \| x \| \|
	\| OH \| \| \| \| x \| \| \| x \| \|
	\| LT \| \| \| \| \| \| x \| \| \|
	\| MT \| \| x \| \| \| x \| \| \| \|
	\| HT \| x \| \| \| x \| \| \| \| \|
	"""

	markdown_table_sample2 = """16th

	\| \| 1 \| 2 \| 3 \| 4 \| 5 \| 6 \| 7 \| 8 \| 9 \| 10\| 11\| 12\| 13\| 14\| 15\| 16\|
	\|----\|---\|---\|---\|---\|---\|---\|---\|---\|---\|---\|---\|---\|---\|---\|---\|---\|
	\| BD \| x \| \| x \| \| \| \| x \| \| x \| \| x \| \| x \| \| x \| \|
	\| SD \| \| \| \| x \| \| \| \| x \| \| \| x \| \| \| \| x \| \|
	\| CH \| x \| \| x \| \| x \| \| x \| \| x \| \| x \| \| x \| \| x \| \|
	\| OH \| \| \| \| x \| \| \| x \| \| \| \| \| x \| \| \| x \| \|
	\| LT \| \| \| \| \| \| x \| \| \| \| \| \| \| \| x \| \| \|
	\| MT \| \| x \| \| \| x \| \| \| \| \| x \| \| \| x \| \| \| \|
	\| HT \| x \| \| \| x \| \| \| \| \| x \| \| \| x \| \| \| \| \|
	"""

	MIDI_NOTENUM = {
	"BD": 36,
	"SD": 38,
	"CH": 42,
	"HH": 44,
	"OH": 46,
	"LT": 48,
	"MT": 48,
	"HT": 50,
	"CP": 50,
	"CB": 56,
	}
	SR = 44100

	MAX_QUERY = 5

	def convert_table_to_audio(markdown_table, resolution=8, bpm = 120.0):
	# convert table to array
	rhythm_pattern = []
	for line in markdown_table.split('\n')[2:]:
	rhythm_pattern.append(line.split('\|')[1:-1])
	print(rhythm_pattern)

	# table to MIDI
	pm = pretty_midi.PrettyMIDI(initial_tempo=bpm) # midi object
	pm_inst = pretty_midi.Instrument(0, is_drum=True) # midi instrument
	pm.instruments.append(pm_inst)

	note_length = (60. / bpm) * (4.0 / resolution) # note duration

	beat_num = resolution
	for i in range(len(rhythm_pattern)):
	for j in range(1, len(rhythm_pattern[i])):
	beat_num = j # for looping
	inst = rhythm_pattern[i][0].strip().upper()
	velocity = 0
	if 'x' == rhythm_pattern[i][j].strip():
	velocity = 120
	if 'o' == rhythm_pattern[i][j].strip():
	velocity = 65
	if velocity > 0:
	if inst in MIDI_NOTENUM.keys():
	midinote = MIDI_NOTENUM[inst]
	note = pretty_midi.Note(velocity=velocity, pitch=midinote, start=note_length * (j-1)+0.0001, end=note_length * j)
	pm_inst.notes.append(note)

	# convert to audio
	audio_data = pm.fluidsynth()

	# cut off the reverb section
	audio_data = audio_data[:int(SRnote_lengthbeat_num)] # for looping, cut the tail
	return audio_data

	def get_answer(question):
	response = openai.ChatCompletion.create(
	model="gpt-3.5-turbo",
	messages=[
	{"role": "system", "content": "You are a rhythm generator. "},
	{"role": "user", "content": "Please generate a rhythm pattern in a Markdown table. Time resolution is the 8th note. You use the following drums. Kick drum:BD, Snare drum:SD, Closed-hihat:CH, Open-hihat:OH, Low-tom:LT, Mid-tom:MT, High-tom:HT. use 'x' for an accented beat, 'o' for a weak beat. You need to write the time resolution first."},
	{"role": "assistant", "content": markdown_table_sample},
	# {"role": "user", "content": "Please generate a rhythm pattern. The resolution is the fourth note. You use the following drums. Kick drum:BD, Snare drum:SD, Closed-hihat:CH, Open-hihat:OH, Low-tom:LT, Mid-tom:MT, High-tom:HT. use 'x' for an accented beat, 'o' for a weak beat. You need to write the time resolution first."},
	# {"role": "assistant", "content": markdown_table_sample},
	{"role": "user", "content": question}
	]
	)
	return response["choices"][0]["message"]["content"]

	def generate_rhythm(query, state):
	print(state)
	if state["gen_count"] > MAX_QUERY and len(state["user_token"]) == 0:
	return [None, "You need to set your ChatGPT API Key to try more than %d times" % MAX_QUERY]
	state["gen_count"] = state["gen_count"] + 1

	# get respance from ChatGPT
	text_output = get_answer(query)

	# Try to use the first row as time resolution
	resolution_text = text_output.split('\|')[0]
	try:
	resolution_text = re.findall(r'\d+', resolution_text)[0]
	resolution = int(resolution_text)
	except:
	resolution = 8 # default

	# Extract rhythm table
	try:
	table = "\|" + "\|".join(text_output.split('\|')[1:-1]) + "\|"
	audio_data = convert_table_to_audio(table, resolution)

	# loop x4
	audio_data = np.tile(audio_data, 4)
	if np.max(audio_data) == 0.0:
	audio_data = np.ones(1)
	except:
	audio_data = np.ones(1)

	return [(SR, audio_data), text_output]
	# %%

	def on_token_change(user_token, state):
	print(user_token)
	openai.api_key = user_token or os.environ.get("OPENAI_API_KEY")
	state["user_token"] = user_token
	return state

	with gr.Blocks() as demo:
	state = gr.State({"gen_count": 0, "user_token":""})
	with gr.Row():
	with gr.Column():
	# gr.Markdown("Ask ChatGPT to generate rhythm patterns")
	gr.Markdown("*Hey TR-ChatGPT, give me a drum pattern!*")
	gr.Markdown("Use the following drums. Kick drum:BD, Snare drum:SD, Closed-hihat:CH, Open-hihat:OH, Low-tom:LT, Mid-tom:MT, High-tom:HT and 'x' for an accented beat, 'o' for a weak beat!")
	with gr.Row():
	with gr.Column():
	inp = gr.Textbox(placeholder="Give me a Hiphop rhythm pattern with some reggae twist!")
	btn = gr.Button("Generate")
	with gr.Column():
	out_audio = gr.Audio()
	out_text = gr.Textbox(placeholder="ChatGPT output")
	with gr.Row():
	with gr.Column():
	gr.Markdown("Enter your own OpenAI API Key to try out more than 5 times. You can get it [here](https://platform.openai.com/account/api-keys).")
	user_token = gr.Textbox(placeholder="OpenAI API Key", type="password", show_label=False)
	btn.click(fn=generate_rhythm, inputs=[inp, state], outputs=[out_audio, out_text])
	user_token.change(on_token_change, inputs=[user_token, state], outputs=[state])
	demo.launch()