Spaces:

ajayarora1235
/

ai-songwriter-beta

Running

App Files Files Community

ajayarora1235 commited on Jul 30, 2024

Commit

917b084

1 Parent(s): 8b3be6a

beta version, add spaces remote

Browse files

Files changed (7) hide show

ai_tools.json +97 -11
app.py +350 -58
chat.py +329 -84
data/conversation_history.json +1 -0
gpt_calls.py +35 -35
suno.py +57 -4
utils/song_utils.py +17 -5

ai_tools.json CHANGED Viewed

@@ -50,6 +50,42 @@
       }
     }
   },
   {
     "type": "function",
     "function": {
@@ -73,14 +109,6 @@
             },
             "description": "Lines to revise, as a list of numbers. The first line is labelled 1, second 2, and so on. Lines to change should be assessed based on feedback user provides and where they suggest changes."
           },
-          "snippet_instrumental_tags": {
-            "type": "string",
-            "description": "Instrumental tags to be used for this section revision. This should be the exact same as what was used for the snippet being revised. The instrumental tags are listed under the recent audio clip html."
-          },
-          "snippet_clip_to_continue_from": {
-            "type": "string",
-            "description": "The 'continued from clip:' clip id that was used in the original snippet we are now revising. This should be the exact same as 'continued from clip' for the snippet being revised, listed under the original snippet's audioplayer; if it is None, this should not be passed."
-          },
           "relevant_ideas": {
             "type": "array",
             "items": {
@@ -96,7 +124,7 @@
             "description": "Words that are requested to be included in revised lines. Unless specific words are requested, this should be empty"
           }
         },
-        "required": ["section_name", "current_section", "lines_to_revise", "snippet_instrumental_tags"]
       }
     }
   },
@@ -131,6 +159,64 @@
       }
     }
   },
   {
     "type": "function",
     "function": {
@@ -149,7 +235,7 @@
           },
           "snippet_clip_to_continue_from": {
             "type": "string",
-            "description": "The last approved song clip to continue from. Almost always the most recent clip id outputted in the conversation. If user did not receive a snippet for the prior section or ever before, this should not be passed."
           }
         },
         "required": ["snippet_lyrics", "snippet_instrumental_tags"]
@@ -208,7 +294,7 @@
           },
           "snippet_clip_to_continue_from": {
             "type": "string",
-            "description": "The last approved clip ID of the song to continue the remaining sections from. Almost always the most recent clip id outputted in the conversation. If user did not receive a snippet for the prior section or ever before, this should not be passed."
           },
           "sections_written": {
             "type": "array",

       }
     }
   },
+  {
+    "type": "function",
+    "function": {
+      "name": "ask_question",
+      "description": "Ask a question to the user to get more information. This function should be called when the AI needs more information about the user to continue writing the song.",
+      "parameters": {
+        "type": "object",
+        "properties": {},
+        "required": []
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "clarify_arguments",
+      "description": "Ask a question to the user to get more information. This function should be called when the AI needs clarification on arguments to pass to a different function call. Should always be called for snippet_clip_to_continue_from in get_audio_snippet and whole_song in revise_instrumental_tags.",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "function_name": {
+            "type": "string",
+            "description": "Function to call that needs clarification on arguments"
+          },
+          "arguments_to_clarify": {
+            "type": "array",
+            "items": {
+              "type": "string"
+            },
+            "description": "List of questions to ask the end user to clarify the arguments we don't know"
+          }
+        },
+        "required": ["function_name", "arguments_to_clarify"]
+      }
+    }
+  },
   {
     "type": "function",
     "function": {
             },
             "description": "Lines to revise, as a list of numbers. The first line is labelled 1, second 2, and so on. Lines to change should be assessed based on feedback user provides and where they suggest changes."
           },
           "relevant_ideas": {
             "type": "array",
             "items": {
             "description": "Words that are requested to be included in revised lines. Unless specific words are requested, this should be empty"
           }
         },
+        "required": ["section_name", "current_section", "lines_to_revise"]
       }
     }
   },
       }
     }
   },
+  {
+    "type": "function",
+    "function": {
+      "name": "revise_section_lyrics_and_instrumental",
+      "description": "Incorporates simultaneous feedback on lyrics and instrumental from a single user message. Call this when the user either has both feedback on lyric changes and instrumental changes that they ask for in a single message or they ask to regenerate a snippet with both changed lyrics and instrumental tags in mind.",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "current_instrumental_tags": {
+            "type": "array",
+            "items": {
+              "type": "string"
+            },
+            "description": "Current instrumental tags passed to instrumental generator. Deduce from most recent list of instrumental tags in conversation."
+          },
+          "user_instrumental_feedback": {
+            "type": "string",
+            "description": "A single sentence summary of the user's feedback on instrumental. Can include suggested moods, genres, or instruments."
+          },
+          "section_name": {
+            "type": "string",
+            "description": "Name of section we are revising"
+          },
+          "current_section": {
+            "type": "string",
+            "description": "Current version of lyrics in section of the song. The last outputted version of the section before user suggested changes that we are now implementing."
+          },
+          "lines_to_revise": {
+            "type": "array",
+            "items": {
+              "type": "number"
+            },
+            "description": "Lines to revise, as a list of numbers. The first line is labelled 1, second 2, and so on. Lines to change should be assessed based on feedback user provides and where they suggest changes."
+          },
+          "snippet_clip_to_continue_from": {
+            "type": "string",
+            "description": "The 'continued from clip:' clip id that was used in the original snippet we are now revising. This should be the exact same as 'continued from clip' for the snippet being revised, listed under the original section's snippet's audioplayer; if it is None, this should not be passed."
+          },
+          "relevant_ideas": {
+            "type": "array",
+            "items": {
+              "type": "string"
+            },
+            "description": "Ideas that should be included in the lines that are being changed. Should be directly derived from suggested user changes."
+          },
+          "relevant_words": {
+            "type": "array",
+            "items": {
+              "type": "string"
+            },
+            "description": "Words that are requested to be included in revised lines. Unless specific words are requested, this should be empty"
+          }
+        },
+        "required": ["current_instrumental_tags", "user_instrumental_feedback", "section_name", "current_section", "lines_to_revise"]
+      }
+    }
+  },
   {
     "type": "function",
     "function": {
           },
           "snippet_clip_to_continue_from": {
             "type": "string",
+            "description": "The last approved song clip to continue from. Almost always the most recent clip id outputted in the conversation. This is located in the tool message prior to the assistant message with the audio player for that audio. If user did not receive a snippet for the prior section or ever before, this should not be passed."
           }
         },
         "required": ["snippet_lyrics", "snippet_instrumental_tags"]
           },
           "snippet_clip_to_continue_from": {
             "type": "string",
+            "description": "The last approved clip ID of the song to continue the remaining sections from. Almost always the most recent clip id outputted in the conversation. This is located in the tool message prior to the assistant message with the audio player for that audio. If user did not receive a snippet for the prior section or ever before, this should not be passed."
           },
           "sections_written": {
             "type": "array",

app.py CHANGED Viewed

@@ -1,14 +1,27 @@
 from typing import List, Tuple, Dict
 import gradio as gr
 from utils.song_utils import generate_song_seed, get_starting_messages, messages_to_history, update_song_details, get_sections
 from chat import model_chat
-from openai import OpenAI
 History = List[Tuple[str, str]] # a type: pairs of (query, response), where query is user input and response is system output
 Messages = List[Dict[str, str]] # a type: list of messages with role and content
-with gr.Blocks() as demo:
     gr.Markdown("""<center><font size=8>AI Songwriter (alpha)</center>""")
     gr.Markdown("""<center><font size=4>Turning your stories into musical poetry. 2024 MIT Senior Thesis.</center>""")
@@ -18,8 +31,7 @@ with gr.Blocks() as demo:
             gr.Markdown("""<center><font size=4>First, let's try to find an interesting concept. Fill out the fields below and generate a song seed.</font></center>""")
             gr.Markdown("""<center><font size=3>If you're stuck, check out <a href="https://onestopforwriters.com/emotions" target="_blank">here</a>.</font></center>""")
             with gr.Row():
-                feeling_input = gr.Textbox(label='How are you feeling today? More vulnerable you are, better the song will be.', placeholder='Enter your emotions', scale=2)
-                songwriter_style = gr.Dropdown(label='Songwriter Style', value = "GPT 4o", choices=["GPT 4o", "d4vd (Indie Rock Ballad - Male)", "Lizzy McAlpine (Indie Pop Folk - Female)", "Phoebe Bridgers (Pop Sad Rock - Female)", "Daniel Caesar (R&B/Soul - Male)"], interactive=True)
                 # audio_input = gr.Audio(sources=["upload"], type="numpy", label="Instrumental",
                 #                 interactive=True, elem_id="instrumental-input")
@@ -36,7 +48,9 @@ with gr.Blocks() as demo:
                         title_input = gr.Textbox(label='Title', placeholder='Enter a song title')
                         genre_input = gr.Textbox(label='Genre', placeholder='Enter a genre')
                         blurb_input = gr.Textbox(label='Blurb', placeholder='Enter a one-sentence blurb')
-                        instrumental_textbox = gr.TextArea(label="Song Structure", value="Verse 1: 4 measures\nChorus 1: 8 measures\nVerse 2: 8 measures\nChorus 2: 8 measures\nVerse 3: 8 measures\nChorus 3: 8 measures", interactive=True, max_lines=3)
                     gr.Markdown("""<center><font size=4>Edit these to your liking and hit 'Continue to Next Step' to start creating!</font></center>""")
                 def open_accordion(x):
@@ -52,6 +66,7 @@ with gr.Blocks() as demo:
                     song_seed = song_seed.split("Suggested Song Concept:")[1].strip()
                 return song_seed
             generate_seed_button.click(generate_song_seed, inputs=[feeling_input], outputs=[instrumental_output]).then(clean_song_seed, inputs=[instrumental_output], outputs=[instrumental_output])
             def make_row_visible(x):
                 return gr.Row(visible=True), gr.Markdown("""<center><font size=4>Here it is! Hit 'Approve' to confirm this concept. Edit the concept directly or hit 'Try Again' to get another suggestion.</font></center>""", visible=True)
@@ -66,37 +81,332 @@ with gr.Blocks() as demo:
                 return gr.Tabs(selected=id)
             continue_btn.click(change_tab, gr.Number(1, visible=False), tabs)
         with gr.TabItem("Generation", id=1): #index is 1
             start_song_gen = gr.State(value=False)
-            gr.Markdown("""<center><font size=4>Now, chat with an AI songwriter to make your song! Tip: get and tune an audio snippet well first and then put effort into the story. Hit finish when ready to hear full song.</font></center>""")
-            generate_lyrics = gr.Button("STEP 2: Write a song with the AIs!")
             character = gr.State(value="A 18-year old boy who dreams of being a pop star that uplifts people going through the difficulties of life")
             starting_messages, starting_history = get_starting_messages("", "Home", "Missing home", "Ballad", instrumental_textbox.value)
             messages = gr.State(value=starting_messages)
             # journal_messages = gr.State(value=[journal_starting_message])
             # journal_response = gr.State(value="")
             with gr.Row():
-                chatbot_history = gr.Chatbot(value=starting_history, label='SongChat', placeholder=None, layout='bubble', bubble_full_width=False, height=500, scale=2)
-                with gr.Column():
-                    songwriter_creativity = gr.Slider(label="Songwriter LLM Temperature", minimum=0, maximum=1, step=0.01, value=1)
-                    lyrics_display = gr.TextArea("[...]", label="Generated Lyrics", show_copy_button=True, container=True)
             section_meanings = gr.State(value="")
             approve_button.click(update_song_details, inputs=[instrumental_output], outputs=[genre_input, title_input, blurb_input]).then(get_sections, inputs=[blurb_input, instrumental_output], outputs=[section_meanings])
             continue_btn.click(get_starting_messages, inputs=[instrumental_textbox, title_input, blurb_input, genre_input, section_meanings], outputs=[messages, chatbot_history])
             with gr.Row():
-                textbox = gr.Textbox(lines=1, label='Send a message', show_label=False, placeholder='Send a message', scale=4)
-                submit = gr.Button("Send", scale=2)
             with gr.Row():
-                get_snippet_button = gr.Button("Get Audio Snippet", scale=2)
-                done = gr.Button("Finish Full Song 🎶", scale=2)
                 #autoGPT_checkbox = gr.Checkbox(label="AutoGPT", value=True, info="Auto-generate responses from journal entry", interactive=True, scale=2)
                 #journal_llm_creativity = gr.Slider(label="Journal LLM Temperature", minimum=0, maximum=1, step=0.01, value=1, interactive=True, scale=2)
                 reset_button = gr.Button("Reset", scale=2)
@@ -104,57 +414,39 @@ with gr.Blocks() as demo:
                 def reset_chat(messages, chatbot_history):
                     messages = messages[:2]
                     chatbot_history = messages_to_history(messages[:2])
-                    return messages, chatbot_history, ''
-                reset_button.click(reset_chat, inputs=[messages, chatbot_history], outputs=[messages, chatbot_history, lyrics_display])
-            with gr.Row():
-                song_link = gr.State(value="")
-                song = gr.HTML()
-            # download_btn = gr.Button("Download Conversation")
-            # def download_conversation(messages):
-            #     #get time
-            #     now = get_current_time()
-            #     # write messages to JSON file
-            #     with open(f'conversation_{now}.json', 'w') as f:
-            #         json.dump(messages, f)
-            # with gr.Accordion("Advanced", open=False):
-            #     suno_tags = gr.Textbox(value="ballad, male, dramatic, emotional, strings", label="Gen input tags")
             #     story_textbox = gr.TextArea(label="Story to provide context to songwriter", value="", max_lines=3)
-            def reset_textbox(textbox):
-                return ""
-            def set_snippet_query(textbox):
-                return "Can I have an audio snippet of what we have now?"
-            def set_finish_query(textbox):
-                return "I'm ready for the full song now! Can you finish it up?"
-            def set_lyrics_song_displays(messages):
-                final_message = messages[-1]['content']
-                final_lyrics = final_message.split("Final Lyrics:")[1].split("Final song:")[0].strip("\n ")
-                song = final_message.split("Final song:")[1].strip("\n ")
-                return final_lyrics, song
-            submit.click(model_chat,
-                        inputs=[genre_input, textbox, chatbot_history, messages],
-                        outputs=[textbox, chatbot_history, messages, lyrics_display]).then(reset_textbox, inputs=[textbox], outputs=[textbox])
-            textbox.submit(model_chat,
-                        inputs=[genre_input, textbox, chatbot_history, messages],
-                        outputs=[textbox, chatbot_history, messages, lyrics_display]).then(reset_textbox, inputs=[textbox], outputs=[textbox])
-            get_snippet_button.click(set_snippet_query, inputs=[textbox], outputs=[textbox]).then(model_chat,
-                        inputs=[genre_input, textbox, chatbot_history, messages],
-                        outputs=[textbox, chatbot_history, messages]).then(reset_textbox, inputs=[textbox], outputs=[textbox])
-            done.click(set_finish_query, inputs=[textbox], outputs=[textbox]).then(model_chat,
-                        inputs=[genre_input, textbox, chatbot_history, messages],
-                        outputs=[textbox, chatbot_history, messages, lyrics_display]).then(
-                            set_lyrics_song_displays, inputs=[messages], outputs=[lyrics_display, song]).then(reset_textbox, inputs=[textbox], outputs=[textbox])
 demo.queue(api_open=False)

 from typing import List, Tuple, Dict
 import gradio as gr
+import os
+import json
 from utils.song_utils import generate_song_seed, get_starting_messages, messages_to_history, update_song_details, get_sections
 from chat import model_chat
+from gradio_modal import Modal
 History = List[Tuple[str, str]] # a type: pairs of (query, response), where query is user input and response is system output
 Messages = List[Dict[str, str]] # a type: list of messages with role and content
+css = """
+#audio-group {
+    max-height: 800px;
+    overflow-y: scroll;
+}
+"""
+textbox = gr.Textbox(lines=1, label='Send a message', show_label=False, placeholder='Send a message', scale=4, visible=True)
+submit = gr.Button("Send", scale=2, visible=True)
+with gr.Blocks(css=css) as demo:
     gr.Markdown("""<center><font size=8>AI Songwriter (alpha)</center>""")
     gr.Markdown("""<center><font size=4>Turning your stories into musical poetry. 2024 MIT Senior Thesis.</center>""")
             gr.Markdown("""<center><font size=4>First, let's try to find an interesting concept. Fill out the fields below and generate a song seed.</font></center>""")
             gr.Markdown("""<center><font size=3>If you're stuck, check out <a href="https://onestopforwriters.com/emotions" target="_blank">here</a>.</font></center>""")
             with gr.Row():
+                feeling_input = gr.Textbox(label="What's an emotion(s) that you've been feeling a lot recently? And why?", placeholder='Enter your emotions', scale=2)
                 # audio_input = gr.Audio(sources=["upload"], type="numpy", label="Instrumental",
                 #                 interactive=True, elem_id="instrumental-input")
                         title_input = gr.Textbox(label='Title', placeholder='Enter a song title')
                         genre_input = gr.Textbox(label='Genre', placeholder='Enter a genre')
                         blurb_input = gr.Textbox(label='Blurb', placeholder='Enter a one-sentence blurb')
+                        songwriter_style = gr.Dropdown(label='Songwriter Style', value = "GPT 4o", choices=["GPT 4o", "d4vd (Indie Rock Ballad - Male)", "Lizzy McAlpine (Indie Pop Folk - Female)", "Phoebe Bridgers (Pop Sad Rock - Female)", "Daniel Caesar (R&B/Soul - Male)"], interactive=True)
+                        instrumental_textbox = gr.TextArea(label="Song Structure", value="Verse 1: 4 measures\nChorus 1: 8 measures\nVerse 2: 8 measures\nChorus 2: 8 measures\nVerse 3: 8 measures\nChorus 3: 8 measures", visible=False, interactive=True, max_lines=3)
                     gr.Markdown("""<center><font size=4>Edit these to your liking and hit 'Continue to Next Step' to start creating!</font></center>""")
                 def open_accordion(x):
                     song_seed = song_seed.split("Suggested Song Concept:")[1].strip()
                 return song_seed
             generate_seed_button.click(generate_song_seed, inputs=[feeling_input], outputs=[instrumental_output]).then(clean_song_seed, inputs=[instrumental_output], outputs=[instrumental_output])
+            feeling_input.submit(generate_song_seed, inputs=[feeling_input], outputs=[instrumental_output]).then(clean_song_seed, inputs=[instrumental_output], outputs=[instrumental_output])
             def make_row_visible(x):
                 return gr.Row(visible=True), gr.Markdown("""<center><font size=4>Here it is! Hit 'Approve' to confirm this concept. Edit the concept directly or hit 'Try Again' to get another suggestion.</font></center>""", visible=True)
                 return gr.Tabs(selected=id)
             continue_btn.click(change_tab, gr.Number(1, visible=False), tabs)
+        # with gr.TabItem("Tutorial", id=1):
+        #     gr.Markdown("""<center><font size=4>Now, let's walkthrough writing a verse together! Start chatting with the chatbot.</font></center>""")
+        #     character = gr.State(value="A 18-year old boy who dreams of being a pop star that uplifts people going through the difficulties of life")
+        #     starting_messages, starting_history = get_starting_messages("", "Home", "Missing home", "Ballad", instrumental_textbox.value)
+        #     messages = gr.State(value=starting_messages)
+        #     section_meanings = gr.State(value="")
+        #     approve_button.click(update_song_details, inputs=[instrumental_output], outputs=[genre_input, title_input, blurb_input]).then(get_sections, inputs=[blurb_input, instrumental_output], outputs=[section_meanings])
+        #     # lyrics_display = gr.State("")
+        #     generated_audios = gr.State([])
+        #     def reset_textbox(textbox):
+        #         return ""
+        #     character = gr.State(value="A 18-year old boy who dreams of being a pop star that uplifts people going through the difficulties of life")
+        #     starting_messages, starting_history = get_starting_messages("", "Home", "Missing home", "Ballad", instrumental_textbox.value)
+        #     print(starting_history, "STARTING HISTORY")
+        #     messages = gr.State(value=starting_messages)
+        #     # messages += [{"role": "assistant", "content": "You are a songwriter. You write songs."}]
+        #     # journal_messages = gr.State(value=[journal_starting_message])
+        #     # journal_response = gr.State(value="")
+        #     generated_audios = gr.State(value=[])
+        #     tutorial_step = gr.Number(0, visible=False)
+        #     with gr.Row():
+        #         with gr.Column(scale=2):
+        #             chatbot_history = gr.Chatbot(type="messages", value=starting_history, label='SongChat', placeholder=None, layout='bubble', bubble_full_width=False, height=500)
+        #             with gr.Row():
+        #                 typical_responses = [textbox, submit]
+        #                 def update_response_options(buttons, button_dict):
+        #                     return [gr.Textbox(visible=len(buttons)==0, scale=4), gr.Button(visible=len(buttons)==0, scale=2)] + [gr.Button(visible=(x in buttons)) for x in button_dict.keys()]
+        #                 button_options = gr.State([])
+        #                 button_dict = gr.State({
+        #                     "revise lyrics": "Can we revise the lyrics?",
+        #                     "generate audio snippet": "Can you generate an audio snippet?",
+        #                     "continue revising" : "Can we continue revising this section?",
+        #                     "generate audio snippet with new lyrics": "Can you generate an audio snippet with these new lyrics?",
+        #                     "return to original instrumental": "Can you use the original clip for this section instead?",
+        #                     "revise genre": "Can we revise the instrumental tags?",
+        #                     "re-revise genre": "Can we revise the instrumental tags?",
+        #                     "continue to next section": "Looks good! Let's move on to the next section.",
+        #                     "merge snippets": "Can you merge this snippet into its full song?"
+        #                 })
+        #                 for button in button_dict.value.keys():
+        #                     btn = gr.Button(button, visible=(button in button_options.value))
+        #                     typical_responses.append(btn)
+        #         with gr.Column(elem_id="audio-group", scale=1) as audio_group_column:
+        #             # songwriter_creativity = gr.Slider(label="Songwriter LLM Temperature", minimum=0, maximum=1, step=0.01, value=1)
+        #             with gr.Group():
+        #                 # loop thru all audio in audio_clips
+        #                 gr.Markdown("""<center><font size=4>All Generations</font></center>""")
+        #                 @gr.render(inputs=generated_audios, triggers=[demo.load, generated_audios.change, textbox.submit, submit.click] + [btn.click for btn in typical_responses[2:]])
+        #                 def render_audio_group(generated_audios):
+        #                     # audio_group = gr.Group()
+        #                     for audio in generated_audios:
+        #                         clip_path, lyrics, instrumental, title, status = audio
+        #                         with gr.Accordion(title, open=False):
+        #                             if status == 'complete':
+        #                                 gr.Audio(value=clip_path, label=title, interactive=False, show_label=False, waveform_options={"show_controls": False})
+        #                             else:
+        #                                 gr.HTML(f'<audio controls><source src="{clip_path}" type="audio/mp3"></audio>')
+        #                             gr.TextArea(label="Lyrics", value=lyrics, interactive=False, show_label=False)
+        #                             gr.TextArea(label="Instrumental", value=instrumental, interactive=False, show_label=False, max_lines=1)
+        #                 gr.Markdown("""<center><font size=4>Current Generation</font></center>""")
+        #                 current_section = gr.Textbox(label="Current section", value="Verse 1", interactive=False, show_label=True)
+        #                 current_lyrics = gr.Textbox(label="Lyrics", value="", interactive=True, show_label=True)
+        #                 with gr.Row():
+        #                     curr_tags = gr.Textbox(label="Instrumental Tags", value="", interactive=True, show_label=True)
+        #                     # @gr.render(inputs=generated_audios, triggers=[demo.load])
+        #                     # def render_clip_to_continue(generated_audios):
+        #                     audio_clips = [x[3] for x in generated_audios.value]
+        #                     clip_to_continue = gr.Dropdown(label='Clip to continue', value = "", choices=audio_clips+[""], interactive=True)
+        #                 #clip_to_continue = gr.Dropdown(label='Clip to continue', value = "", choices=audio_clips+[""], interactive=True)
+        #                 songwriter_style = gr.Dropdown(label='Songwriter Style', value= "GPT 4o", choices=["GPT 4o", "d4vd"], interactive=True)
+        #                 with gr.Row():
+        #                     #curr_audio = gr.State("")
+        #                     curr_audio = gr.HTML(label="Generated section")
+        #                     regen = gr.Button("Re-generate")
+        #     section_meanings = gr.State(value="")
+        #     approve_button.click(update_song_details, inputs=[instrumental_output], outputs=[genre_input, title_input, blurb_input]).then(get_sections, inputs=[blurb_input, instrumental_output], outputs=[section_meanings])
+        #     continue_btn.click(get_starting_messages, inputs=[instrumental_textbox, title_input, blurb_input, genre_input, section_meanings], outputs=[messages, chatbot_history])
+        #     def set_response_buttons(button_dict, button_name):
+        #         print(button_name)
+        #         return button_dict[button_name]
+        #     with gr.Row(visible=True) as chat_row_0:
+        #         textbox_0 = gr.Textbox(lines=1, label='Send a message', show_label=False, placeholder='Send a message', scale=4)
+        #         submit_0 = gr.Button("Send", scale=2)
+        #     for btn in typical_responses[2:]:
+        #             btn.click(set_response_buttons, inputs=[button_dict, btn], outputs=[textbox]).then(model_chat,
+        #                             inputs=[genre_input, textbox, chatbot_history, messages, generated_audios],
+        #                             outputs=[textbox, chatbot_history, messages, current_section, current_lyrics, curr_tags, clip_to_continue, curr_audio, generated_audios, button_options]).then(reset_textbox, inputs=[textbox], outputs=[textbox]).then(
+        #                             update_response_options, [button_options, button_dict], typical_responses
+        #                     )
+        #     with Modal(visible=False) as modal_0:
+        #         gr.Markdown("Welcome to the AI songwriter! The AI songwriter will help you write a song. You can chat with the AI, generate lyrics, and listen to audio snippets. Let's start by chatting with the AI.")
+        #     with Modal(visible=False) as modal:
+        #         gr.Markdown("The chatbot is run by an AI songwriter. It can respond to your conversations, generate lyrics and audio, and edit prior generations.\n\nNow, continue and respond to this second question from the AI songwriter.")
+        #     with Modal(visible=False) as modal_1:
+        #         gr.Markdown("The AI songwriter has now proposed a first verse! You now have the option to hear an audio snippet, revise the lyrics, or continue to the next section. The latter two options continue the conversation, whereas the first starts audio generation models. Select the 'get audio snippet' button to continue to the next step.")
+        #     with Modal(visible=False) as modal_2:
+        #         gr.Markdown("Awesome! You generated your first audio snippet./n/n As you work thru each section, the generated snippets are populated on the right panel. You'll be able to listen thru snippets as you work thru the song. \n\n "
+        #                     "The current section is also displayed on the right panel. You'll be able to revise sections via the chat or directly via the right panel. \n\n "
+        #                     "You're ready to start your official song! Hit the 'Start' button to start.")
+        #         start_button = gr.Button("Start")
+        #     # start_button.click(change_tab, gr.Number(2, visible=False), tabs).then(update_generation_tab,
+        #     #                 inputs=[],
+        #     #                 outputs=[])
+        #     continue_btn.click(lambda: Modal(visible=True), None, modal_0)
+        #     def make_modal_visible(step_number):
+        #         new_step_number = step_number + 1 if step_number in [0, 1, 2] else step_number
+        #         modals = [Modal(visible=i == step_number) for i in range(3)]
+        #         return new_step_number, *modals
+        #     submit_0.click(update_textbox, [textbox_0, tutorial_step], [textbox_0]).then(model_chat,
+        #                 inputs=[genre_input, textbox_0, chatbot_history, messages, generated_audios],
+        #                 outputs=[textbox_0, chatbot_history, messages, current_section, current_lyrics, curr_tags, clip_to_continue, curr_audio, generated_audios, button_options]).then(reset_textbox, inputs=[textbox_0], outputs=[textbox_0]).then(
+        #                 update_response_options, [button_options, button_dict], typical_responses
+        #         ).then(
+        #                     make_modal_visible, [tutorial_step], [tutorial_step, modal, modal_1, modal_2]
+        #                 )
+        #     textbox_0.submit(update_textbox, [textbox_0, tutorial_step], [textbox_0]).then(model_chat,
+        #                 inputs=[genre_input, textbox_0, chatbot_history, messages, generated_audios],
+        #                 outputs=[textbox_0, chatbot_history, messages, current_section, current_lyrics, curr_tags, clip_to_continue, curr_audio, generated_audios, button_options]).then(reset_textbox, inputs=[textbox_0], outputs=[textbox_0]).then(
+        #                 update_response_options, [button_options, button_dict], typical_responses
+        #         ).then(
+        #                     make_modal_visible, [tutorial_step], [tutorial_step, modal, modal_1, modal_2]
+        #                 )
         with gr.TabItem("Generation", id=1): #index is 1
             start_song_gen = gr.State(value=False)
+            gr.Markdown("""<center><font size=4>Now, chat with an AI songwriter to make your song! Hit finish when ready to hear full song.</font></center>""")
             character = gr.State(value="A 18-year old boy who dreams of being a pop star that uplifts people going through the difficulties of life")
             starting_messages, starting_history = get_starting_messages("", "Home", "Missing home", "Ballad", instrumental_textbox.value)
+            print(starting_history, "STARTING HISTORY")
             messages = gr.State(value=starting_messages)
+            # messages += [{"role": "assistant", "content": "You are a songwriter. You write songs."}]
             # journal_messages = gr.State(value=[journal_starting_message])
             # journal_response = gr.State(value="")
+            generated_audios = gr.State(value=[])
+            tutorial_step = gr.Number(value=0, visible=False)
             with gr.Row():
+                with gr.Column(scale=2):
+                    chatbot_history = gr.Chatbot(type="messages", value=starting_history, label='SongChat', placeholder=None, layout='bubble', bubble_full_width=False, height=500)
+                    with gr.Row():
+                        typical_responses = [textbox, submit]
+                        def update_response_options(buttons, button_dict):
+                            return [gr.Textbox(visible=len(buttons)==0, scale=4), gr.Button(visible=len(buttons)==0, scale=2)] + [gr.Button(visible=(x in buttons)) for x in button_dict.keys()]
+                        button_options = gr.State([])
+                        button_dict = gr.State({
+                            "revise lyrics": "Can we revise the lyrics?",
+                            "generate audio snippet": "Can you generate an audio snippet?",
+                            "continue revising" : "Can we continue revising this section?",
+                            "generate audio snippet with new lyrics": "Can you generate an audio snippet with these new lyrics?",
+                            "return to original instrumental": "Can you use the original clip for this section instead?",
+                            "revise genre": "Can we revise the instrumental tags?",
+                            "re-revise genre": "Can we revise the instrumental tags?",
+                            "continue to next section": "Looks good! Let's move on to the next section.",
+                            "merge snippets": "Can you merge this snippet into its full song?"
+                        })
+                        for button in button_dict.value.keys():
+                            btn = gr.Button(button, visible=(button in button_options.value))
+                            typical_responses.append(btn)
+                with gr.Column(elem_id="audio-group", scale=1):
+                    # songwriter_creativity = gr.Slider(label="Songwriter LLM Temperature", minimum=0, maximum=1, step=0.01, value=1)
+                    with gr.Group():
+                        # loop thru all audio in audio_clips
+                        gr.Markdown("""<center><font size=4>All Generations</font></center>""")
+                        @gr.render(inputs=generated_audios, triggers=[demo.load, generated_audios.change, textbox.submit, submit.click] + [btn.click for btn in typical_responses[2:]])
+                        def render_audio_group(generated_audios):
+                            # audio_group = gr.Group()
+                            for audio in generated_audios:
+                                clip_path, lyrics, instrumental, title, status = audio
+                                with gr.Accordion(title, open=False):
+                                    if status == 'complete':
+                                        gr.Audio(value=clip_path, label=title, interactive=False, show_label=False, waveform_options={"show_controls": False})
+                                    else:
+                                        gr.HTML(f'<audio controls><source src="{clip_path}" type="audio/mp3"></audio>')
+                                    gr.TextArea(label="Lyrics", value=lyrics, interactive=False, show_label=False)
+                                    gr.TextArea(label="Instrumental", value=instrumental, interactive=False, show_label=False, max_lines=1)
+                        gr.Markdown("""<center><font size=4>Current Generation</font></center>""")
+                        current_section = gr.Textbox(label="Current section", value="Verse 1", interactive=False, show_label=True)
+                        current_lyrics = gr.Textbox(label="Lyrics", value="", interactive=True, show_label=True)
+                        with gr.Row():
+                            curr_tags = gr.Textbox(label="Instrumental Tags", value="", interactive=True, show_label=True)
+                            # @gr.render(inputs=generated_audios, triggers=[demo.load])
+                            # def render_clip_to_continue(generated_audios):
+                            audio_clips = [x[3] for x in generated_audios.value]
+                            clip_to_continue = gr.Dropdown(label='Clip to continue', value = "", choices=audio_clips+[""], interactive=True)
+                        #clip_to_continue = gr.Dropdown(label='Clip to continue', value = "", choices=audio_clips+[""], interactive=True)
+                        songwriter_style = gr.Dropdown(label='Songwriter Style', value= "GPT 4o", choices=["GPT 4o", "d4vd"], interactive=True)
+                        with gr.Row():
+                            #curr_audio = gr.State("")
+                            curr_audio = gr.HTML(label="Generated section")
+                            regen = gr.Button("Re-generate")
             section_meanings = gr.State(value="")
             approve_button.click(update_song_details, inputs=[instrumental_output], outputs=[genre_input, title_input, blurb_input]).then(get_sections, inputs=[blurb_input, instrumental_output], outputs=[section_meanings])
             continue_btn.click(get_starting_messages, inputs=[instrumental_textbox, title_input, blurb_input, genre_input, section_meanings], outputs=[messages, chatbot_history])
+            with Modal(visible=False) as modal_0:
+                gr.Markdown("Welcome to the AI songwriter! The AI songwriter will help you write a song. You can chat with the AI, generate lyrics, and listen to audio snippets. Let's start by chatting with the AI.")
+            with Modal(visible=False) as modal:
+                gr.Markdown("The chatbot is run by an AI songwriter. It can respond to your conversations, generate lyrics and audio, and edit prior generations.\n\nNow, continue and respond to this second question from the AI songwriter.")
+            with Modal(visible=False) as modal_1:
+                gr.Markdown("The AI songwriter has now proposed a first verse! You now have the option to hear an audio snippet, revise the lyrics, or continue to the next section. The latter two options continue the conversation, whereas the first starts audio generation models. Select the 'get audio snippet' button to continue to the next step.")
+            with Modal(visible=False) as modal_2:
+                gr.Markdown("Awesome! You generated your first audio snippet./n/n As you work thru each section, the generated snippets are populated on the right panel. You'll be able to listen thru snippets as you work thru the song. \n\n "
+                            "The current section is also displayed on the right panel. You'll be able to revise sections via the chat or directly via the right panel. \n\n "
+                            "You're ready to start your official song! Hit the 'Start' button to start.")
+                start_button = gr.Button("Start")
+            continue_btn.click(lambda: Modal(visible=True), None, modal_0)
+            start_button.click(lambda: Modal(visible=False), None, modal_2)
+            def make_modal_visible(step_number):
+                new_step_number = step_number + 1 if step_number in [0, 1, 2] else step_number
+                modals = [Modal(visible=i == step_number) for i in range(3)]
+                return new_step_number, *modals
+            def update_textbox(textbox, step_number):
+                if step_number == 0:
+                    return textbox + "\nAsk me another question to inform the verse"
+                elif step_number == 1:
+                    return textbox + "\nUse this info to write a verse"
+                else:
+                    return textbox
+            def set_response_buttons(button_dict, button_name):
+                print(button_name)
+                return button_dict[button_name]
+            def set_regenerate_query(textbox, current_section, current_lyrics, curr_tags, clip_to_continue):
+                return f"Can you revise this section so it uses these lyrics and instrumentals and then generate an audio snippet using it?\nLyrics:\n{current_lyrics}Instrumental tags: {curr_tags}\nClip to continue: {clip_to_continue}"
+            def set_snippet_query(textbox):
+                return "Can I have an audio snippet of what we have now?"
+            def set_finish_query(textbox):
+                return "I'm ready for the full song now! Can you finish it up?"
+            def reset_textbox(textbox):
+                return ""
             with gr.Row():
+                textbox.render()
+                submit.render()
+                for btn in typical_responses[2:]:
+                    btn.click(set_response_buttons, inputs=[button_dict, btn], outputs=[textbox]).then(model_chat,
+                                    inputs=[genre_input, textbox, chatbot_history, messages, generated_audios],
+                                    outputs=[textbox, chatbot_history, messages, current_section, current_lyrics, curr_tags, clip_to_continue, curr_audio, generated_audios, button_options]).then(reset_textbox, inputs=[textbox], outputs=[textbox]).then(
+                                    update_response_options, [button_options, button_dict], typical_responses
+                            ).then(
+                            make_modal_visible, [tutorial_step], [tutorial_step, modal, modal_1, modal_2]
+                        )
+            submit.click(update_textbox, [textbox, tutorial_step], [textbox]).then(model_chat,
+                inputs=[genre_input, textbox, chatbot_history, messages, generated_audios],
+                outputs=[textbox, chatbot_history, messages, current_section, current_lyrics, curr_tags, clip_to_continue, curr_audio, generated_audios, button_options]).then(reset_textbox, inputs=[textbox], outputs=[textbox]).then(
+                        update_response_options, [button_options, button_dict], typical_responses
+                ).then(
+                            make_modal_visible, [tutorial_step], [tutorial_step, modal, modal_1, modal_2]
+                        )
+            textbox.submit(update_textbox, [textbox, tutorial_step], [textbox]).then(model_chat,
+                inputs=[genre_input, textbox, chatbot_history, messages, generated_audios],
+                outputs=[textbox, chatbot_history, messages, current_section, current_lyrics, curr_tags, clip_to_continue, curr_audio, generated_audios, button_options]).then(reset_textbox, inputs=[textbox], outputs=[textbox]).then(
+                        update_response_options, [button_options, button_dict], typical_responses
+                ).then(
+                            make_modal_visible, [tutorial_step], [tutorial_step, modal, modal_1, modal_2]
+                        )
+            regen.click(set_regenerate_query, inputs=[textbox, current_section, current_lyrics, curr_tags, clip_to_continue], outputs=[textbox]).then(model_chat,
+                inputs=[genre_input, textbox, chatbot_history, messages, generated_audios],
+                outputs=[textbox, chatbot_history, messages, current_section, current_lyrics, curr_tags, clip_to_continue, curr_audio, generated_audios, button_options]).then(reset_textbox, inputs=[textbox], outputs=[textbox]).then(
+                        update_response_options, [button_options, button_dict], typical_responses
+                ).then(
+                            make_modal_visible, [tutorial_step], [tutorial_step, modal, modal_1, modal_2]
+                        )
             with gr.Row():
+                # get_snippet_button = gr.Button("Get Audio Snippet", scale=2)
+                done = gr.Button("Finish Full Song 🎶", scale=4)
                 #autoGPT_checkbox = gr.Checkbox(label="AutoGPT", value=True, info="Auto-generate responses from journal entry", interactive=True, scale=2)
                 #journal_llm_creativity = gr.Slider(label="Journal LLM Temperature", minimum=0, maximum=1, step=0.01, value=1, interactive=True, scale=2)
                 reset_button = gr.Button("Reset", scale=2)
                 def reset_chat(messages, chatbot_history):
                     messages = messages[:2]
                     chatbot_history = messages_to_history(messages[:2])
+                    return messages, chatbot_history, '', '', '', '', gr.HTML('<center>generating...</center>'), [], []
+                reset_button.click(reset_chat, inputs=[messages, chatbot_history], outputs=[messages, chatbot_history, current_section, current_lyrics, curr_tags, clip_to_continue, curr_audio, generated_audios, button_options])
+            done.click(set_finish_query, inputs=[textbox], outputs=[textbox]).then(model_chat,
+                inputs=[genre_input, textbox, chatbot_history, messages, generated_audios],
+                outputs=[textbox, chatbot_history, messages, current_section, current_lyrics, curr_tags, clip_to_continue, curr_audio, generated_audios, button_options])
+            demo.load(reset_chat, inputs=[messages, chatbot_history], outputs=[messages, chatbot_history, current_section, current_lyrics, curr_tags, clip_to_continue, curr_audio, generated_audios])
+            # with gr.Row():
+            #     song_link = gr.State(value="")
+            #     song = gr.HTML()
+            def download_conversation(messages):
+                with open(f'data/conversation_history.json', 'w') as f:
+                    json.dump(messages, f)
+            with gr.Accordion("Admin", open=False):
+                download_btn = gr.Button("Download Conversation")
+                download_btn.click(download_conversation, [messages], None)
             #     story_textbox = gr.TextArea(label="Story to provide context to songwriter", value="", max_lines=3)
+            # get_snippet_button.click(set_snippet_query, inputs=[textbox], outputs=[textbox]).then(model_chat,
+            #             inputs=[genre_input, textbox, chatbot_history, messages, generated_audios],
+            #             outputs=[textbox, chatbot_history, messages, current_section, current_lyrics, curr_tags, clip_to_continue, curr_audio, generated_audios]).then(reset_textbox, inputs=[textbox], outputs=[textbox])
 demo.queue(api_open=False)

chat.py CHANGED Viewed

@@ -2,8 +2,17 @@ from typing import List, Optional, Tuple, Dict
 import os
 import json
 from openai import OpenAI
-from suno import make_song, concat_snippets
 from gpt_calls import AI_Songwriter
 from utils.song_utils import messages_to_history
@@ -11,17 +20,21 @@ History = List[Tuple[str, str]] # a type: pairs of (query, response), where quer
 Messages = List[Dict[str, str]] # a type: list of messages with role and content
 client_key = os.getenv("OPEN_AI_KEY")
 oai_client = OpenAI(
     api_key=client_key,
 )
-def model_chat(genre_input, query: Optional[str], history: Optional[History], messages: Optional [Messages], auto=False) -> Tuple[str, str, History, Messages]:
-    if query is None:
-        query = ''
-    if not query.endswith('?'):
-        query += " Use write_section when you have a large amount of story to pull from to write the next section! Alternatively ask me a follow up before moving to write."
     with open('ai_tools.json') as f:
         ai_tools = json.load(f)
@@ -32,14 +45,13 @@ def model_chat(genre_input, query: Optional[str], history: Optional[History], me
     else:
         messages = messages + [{'role': 'user', 'content': query}]
     messages_filtered = messages
     response_message = oai_client.chat.completions.create(
         model="gpt-4o",
         messages=messages_filtered,
         tools = ai_tools,
-        tool_choice="auto",
     )
     print(response_message, "model chat response")
     current_response = ""
@@ -52,18 +64,124 @@ def model_chat(genre_input, query: Optional[str], history: Optional[History], me
             "tool_calls": tool_calls,
             "function_call": response_message.choices[0].message.function_call
         })
         # If true the model will return the name of the tool / function to call and the argument(s)
         for tool_call in tool_calls:
             print(tool_call)
             tool_call_id = tool_call.id
             tool_function_name = tool_call.function.name
             tool_query_args = eval(tool_call.function.arguments)
-            # Step 3: Call the function and retrieve results. Append the results to the messages list.
-            if tool_function_name == 'write_section':
                 snippet_instrumental_tags = tool_query_args.pop('snippet_instrumental_tags', None)
                 snippet_clip_to_continue_from = tool_query_args.pop('snippet_clip_to_continue_from', None)
                 suggested_lyrics = songwriterAssistant.write_section(**tool_query_args)
                 ## yield suggested lyrics in tool and assistant message
                 tool_message = {'role': 'tool', 'tool_call_id': tool_call_id, 'name': tool_function_name, 'content': suggested_lyrics}
@@ -77,35 +195,36 @@ def model_chat(genre_input, query: Optional[str], history: Optional[History], me
                 current_response = model_response_with_function_call.choices[0].message.content
                 role = "assistant"
-                new_messages = new_messages + [{'role': role, 'content': current_response}]
                 # new_messages = [msg for msg in new_messages if msg['content'] is not None and msg['role'] in ['user', 'assistant']]
                 history = messages_to_history(new_messages)
-                yield '', history, new_messages, '[...]'
-                # new_history = messages_to_history(new_messages)
-                # yield '', new_history, new_messages, '[...]'
-                # ### call make_song here with the snippet_lyrics, snippet_instrumental_tags, and snippet_clip_to_continue
-                # song_link = make_song(suggested_lyrics, snippet_instrumental_tags, snippet_clip_to_continue)
-                # ## filter out suno link from tool query arg
-                # clip_id = song_link.split("https://audiopipe.suno.ai/?item_id=")[1]
-                # ## add song link to tool and audio message
-                # tool_message = {'role': 'tool', 'tool_call_id': tool_call_id, 'name': tool_function_name, 'content': suggested_lyrics + '\nclip id: ' + clip_id}
-                # audio_message = {'role': 'assistant', 'content': "Here's what I've come up with:\n" + suggested_lyrics + '\n\n' + f'<audio controls autoplay><source src="{song_link}" type="audio/mp3"></audio><p>clip id: {clip_id}</p><p>instrumental tags: {snippet_instrumental_tags}</p>'}
-                # audio_message['content'] += f'<p>continued from clip: {snippet_clip_to_continue}</p>'
-                # audio_message['content'] += f'\n\nWhat do you think?'
-                # new_messages = messages + [tool_message, audio_message]
-                # new_history = messages_to_history(new_messages)
-                # yield '', new_history, new_messages, '[...]'
             elif tool_function_name == 'revise_section_lyrics':
-                snippet_instrumental_tags = tool_query_args.pop('snippet_instrumental_tags', None)
-                snippet_clip_to_continue_from = tool_query_args.pop('snippet_clip_to_continue_from', None)
                 revised_lyrics = songwriterAssistant.revise_section_lyrics(**tool_query_args)
                 # ## yield revised lyrics in tool and assistant message
                 tool_message = {'role': 'tool', 'tool_call_id': tool_call_id, 'name': tool_function_name, 'content': revised_lyrics}
                 # audio_message = {'role': 'assistant', 'content': "Here's my revised lyrics:\n" + revised_lyrics + "\n\nGenerating audio snippet..."}
@@ -117,45 +236,36 @@ def model_chat(genre_input, query: Optional[str], history: Optional[History], me
                 )  # get a new response from the model where it can see the function response
                 current_response = model_response_with_function_call.choices[0].message.content
                 role = "assistant"
-                new_messages = new_messages + [{'role': role, 'content': current_response}]
                 # new_messages = [msg for msg in new_messages if msg['content'] is not None and msg['role'] in ['user', 'assistant']]
                 history = messages_to_history(new_messages)
-                yield '', history, new_messages, '[...]'
-                # new_history = messages_to_history(new_messages)
-                # yield '', new_history, new_messages, '[...]'
-                # ### call make_song here with the snippet_lyrics, snippet_instrumental_tags, and snippet_clip_to_continue
-                # song_link = make_song(revised_lyrics, snippet_instrumental_tags, snippet_clip_to_continue)
-                # ## filter out suno link from tool query arg
-                # clip_id = song_link.split("https://audiopipe.suno.ai/?item_id=")[1]
-                # ## add song link to tool and audio message
-                # tool_message = {'role': 'tool', 'tool_call_id': tool_call_id, 'name': tool_function_name, 'content': revised_lyrics + '\nclip id: ' + clip_id}
-                # audio_message = {'role': 'assistant', 'content': "Here's what I've come up with:\n" + revised_lyrics + '\n\n' + f'<audio controls autoplay><source src="{song_link}" type="audio/mp3"></audio><p>clip id: {clip_id}</p><p>instrumental tags: {snippet_instrumental_tags}</p>'}
-                # audio_message['content'] += f'<p>continued from clip: {snippet_clip_to_continue}</p>'
-                # audio_message['content'] += f'\n\nWhat do you think?'
-                # new_messages = messages + [tool_message, audio_message]
-                # new_history = messages_to_history(new_messages)
-                # yield '', new_history, new_messages, '[...]'
             elif tool_function_name == 'revise_instrumental_tags':
                 #detangle tool_query_args dict
                 #snippet_lyrics = tool_query_args['snippet_lyrics'] + "\n[End]"
-                snippet_instrumental_tags = tool_query_args['current_instrumental_tags']
-                user_instrumental_feedback = tool_query_args['user_instrumental_feedback']
                 # if 'snippet_clip_to_continue_from' not in tool_query_args:
                 #     tool_query_args['snippet_clip_to_continue_from'] = None
                 # snippet_clip_to_continue_from = tool_query_args['snippet_clip_to_continue_from']
                 new_instrumental_tags = songwriterAssistant.revise_instrumental_tags(snippet_instrumental_tags, user_instrumental_feedback)
-                # yield new_instrumental_tags in tool and assistant message
-                # tool_message = {'role': 'tool', 'tool_call_id': tool_call_id, 'name': tool_function_name, 'content': f'new instrumental tags: {new_instrumental_tags}'}
-                # audio_message = {'role': 'assistant', 'content': f'Sure! I\'ve revised the instrumental tags: {new_instrumental_tags}\n\n Generating audio snippet...'}
-                # new_messages = messages + [tool_message, audio_message]
-                # new_history = messages_to_history(new_messages)
-                # yield '', new_history, new_messages, '[...]'
                 if isinstance(tool_query_args['sections_written'], str):
                     current_lyrics = tool_query_args['sections_written']
@@ -164,28 +274,116 @@ def model_chat(genre_input, query: Optional[str], history: Optional[History], me
                 else:
                     current_lyrics = ""
                 #current_lyrics = "\n".join(tool_query_args['sections_written'])
                 song_link = make_song(current_lyrics, new_instrumental_tags)
                 ## filter out suno link from tool query arg
                 clip_id = song_link.split("https://audiopipe.suno.ai/?item_id=")[1]
                 tool_message = {'role': 'tool', 'tool_call_id': tool_call_id, 'name': tool_function_name, 'content': f'new instrumental tags: {new_instrumental_tags}, clip id: {clip_id}'}
-                audio_message = {'role': 'assistant', 'content': f'Sure! I\'ve revised the instrumental tags: {new_instrumental_tags}\nCurrent lyrics: {current_lyrics}\n\n <audio controls autoplay><source src="{song_link}" type="audio/mp3"></audio><p>clip id: {clip_id}</p>'}
                 audio_message['content'] += f'\n\nWhat do you think?'
                 new_messages = messages + [tool_message, audio_message]
                 new_history = messages_to_history(new_messages)
-                yield '', new_history, new_messages, '[...]'
-            elif tool_function_name == 'merge_all_snippets':
-                updated_clip_url, updated_lyrics, clips_list = concat_snippets(tool_query_args['last_snippet_id'])
-                updated_clip_id = updated_clip_url.split("https://audiopipe.suno.ai/?item_id=")[1]
-                #pass this info in new tool and assistant message
-                tool_message = {'role': 'tool', 'tool_call_id': tool_call_id, 'name': tool_function_name, 'content': f'updated clip id: {updated_clip_id}\nupdated lyrics: {updated_lyrics}\nupdated clips path: {clips_list}'}
-                audio_message = {'role': 'assistant', 'content': f'Sure! All the clips are now merged. <p>updated lyrics: {updated_lyrics}</p><audio controls autoplay><source src="{updated_clip_url}" type="audio/mp3"></audio><p>updated clip id: {updated_clip_id}</p><p>updated clips path: {clips_list}</p>'}
-                new_messages = messages + [tool_message, audio_message]
                 new_history = messages_to_history(new_messages)
-                yield '', new_history, new_messages, '[...]'
             elif tool_function_name == 'finish_full_song':
                 ## args are sections_to_be_written, relevant_ideas, last_snippet_id, sni
@@ -203,7 +401,10 @@ def model_chat(genre_input, query: Optional[str], history: Optional[History], me
                 ## STEP 1: WRITE ALL LYRICS using songwriterAssistant
                 remaining_lyrics = songwriterAssistant.write_all_lyrics(**tool_query_args)
                 full_lyrics = current_lyrics + remaining_lyrics + "\n[End]"
-                yield '', history, messages, full_lyrics
                 ## STEP 2: MAKE SONG FOR REMAINING LYRICS
                 song_link = make_song(remaining_lyrics, snippet_instrumental_tags, snippet_clip_to_continue_from)
@@ -214,11 +415,15 @@ def model_chat(genre_input, query: Optional[str], history: Optional[History], me
                 new_messages = messages + [tool_message, audio_message]
                 new_history = messages_to_history(new_messages)
-                yield '', new_history, new_messages, full_lyrics
                 ## STEP 3: MERGE FULL SONG
                 if snippet_clip_to_continue_from not in [None, ""]:
-                    updated_clip_url, updated_lyrics, clips_list = concat_snippets(song_link.split("https://audiopipe.suno.ai/?item_id=")[1])
                 else:
                     updated_clip_url, updated_lyrics, clips_list = song_link, remaining_lyrics, []
                 ## YIELD UPDATED CLIP URL, LYRICS, AND CLIPS LIST
@@ -226,42 +431,82 @@ def model_chat(genre_input, query: Optional[str], history: Optional[History], me
                 #tool and assistant message
                 tool_message = {'role': 'tool', 'tool_call_id': tool_call_id, 'name': tool_function_name, 'content': f'updated clip id: {updated_clip_id}\nupdated lyrics: {updated_lyrics}\nupdated clips path: {clips_list}'}
-                audio_message = {'role': 'assistant', 'content': f'All done! Thank you for participating :) \nFinal Lyrics: {full_lyrics} \nFinal song: <audio controls autoplay><source src="{updated_clip_url}" type="audio/mp3"></audio><p>clip id: {updated_clip_id}</p>'}
                 new_messages = messages + [tool_message, audio_message]
                 new_history = messages_to_history(new_messages)
-                yield '', new_history, new_messages, '[...]'
             elif tool_function_name == 'get_audio_snippet':
                 #detangle tool_query_args dict
                 snippet_lyrics = tool_query_args['snippet_lyrics'] + "\n[End]"
                 snippet_instrumental_tags = tool_query_args['snippet_instrumental_tags']
-                if 'snippet_clip_to_continue_from' not in tool_query_args:
-                    tool_query_args['snippet_clip_to_continue_from'] = None
-                snippet_clip_to_continue_from = tool_query_args['snippet_clip_to_continue_from']
                 song_link = make_song(snippet_lyrics, snippet_instrumental_tags, snippet_clip_to_continue_from)
                 ## filter out suno link from tool query arg
                 clip_id = song_link.split("https://audiopipe.suno.ai/?item_id=")[1]
-                tool_message = {'role': 'tool', 'tool_call_id': tool_call_id, 'name': tool_function_name, 'content': f'instrumental tags: {tool_query_args["snippet_instrumental_tags"]}, clip id: {clip_id}'}
-                audio_message_content = "Here's what I've come up with:\n" + snippet_lyrics + '\n\n' + f'<audio controls autoplay><source src="{song_link}" type="audio/mp3"></audio><p>instrumental tags: {tool_query_args["snippet_instrumental_tags"]}</p><p>clip id: {clip_id}</p>'
                 audio_message_content += f'<p>continued from clip: {snippet_clip_to_continue_from}</p>'
                 audio_message = {'role': 'assistant', 'content': audio_message_content}
                 new_messages = messages + [tool_message, audio_message]
                 new_history = messages_to_history(new_messages)
-                yield '', new_history, new_messages
             else:
                 print(f"Error: function {tool_function_name} does not exist")
-    #         messages.append({
-    #             "role":"tool",
-    #             "tool_call_id":tool_call_id,
-    #             "name": tool_function_name,
-    #             "content":results
-    #         })
-        # Step 4: Invoke the chat completions API with the function response appended to the messages list
-        # Note that messages with role 'tool' must be a response to a preceding message with 'tool_calls'
     else:
         # Model did not identify a function to call, result can be returned to the user

 import os
 import json
 from openai import OpenAI
+from dotenv import load_dotenv
+import asyncio
+import regex as re
+from gradio_modal import Modal
+import gradio as gr
+import time
+# Load environment variables from .env file
+load_dotenv()
+from suno import make_song, concat_snippets, update_song_links
 from gpt_calls import AI_Songwriter
 from utils.song_utils import messages_to_history
 Messages = List[Dict[str, str]] # a type: list of messages with role and content
 client_key = os.getenv("OPEN_AI_KEY")
+print(client_key)
 oai_client = OpenAI(
     api_key=client_key,
 )
+def determine_title(section_name, generated_audios):
+    count = sum(1 for audio in generated_audios if audio[2].startswith(section_name))
+    if count > 0:
+        section_name = f"{section_name} {count + 1}"
+    return section_name
+def model_chat(genre_input, query: Optional[str], history: Optional[History], messages: Optional[Messages], generated_audios: List[Tuple[str, str, str]], auto=False) -> Tuple[str, History, Messages, str, str, str, str, str, List]:
+    if query is None:
+        query = ''
     with open('ai_tools.json') as f:
         ai_tools = json.load(f)
     else:
         messages = messages + [{'role': 'user', 'content': query}]
     messages_filtered = messages
     response_message = oai_client.chat.completions.create(
         model="gpt-4o",
         messages=messages_filtered,
         tools = ai_tools,
+        tool_choice="required",
     )
     print(response_message, "model chat response")
     current_response = ""
             "tool_calls": tool_calls,
             "function_call": response_message.choices[0].message.function_call
         })
+        if len(tool_calls) > 1:
+            for tool_call in tool_calls:
+                tool_message = {
+                    'role': 'tool',
+                    'tool_call_id': tool_call.id,
+                    'name': tool_call.function.name,
+                    'content': "You called two different functions when you can only call one at a time. Did you mean to call revise_section_lyrics_and_instrumental but instead had two different calls for lyrics and instrumental? Communicate this failure to the user and clarify what they are asking for, then only call one tool next time."
+                }
+                messages.append(tool_message)
+            # Generate a response using GPT-4o and add it as a message
+            model_response_with_function_call = oai_client.chat.completions.create(
+                model="gpt-4o",
+                messages=messages,
+            )
+            current_response = model_response_with_function_call.choices[0].message.content
+            role = "assistant"
+            messages.append({'role': role, 'content': current_response})
+            yield '', messages_to_history(messages), messages, '', '', '', '', None, generated_audios, []
+            return
         # If true the model will return the name of the tool / function to call and the argument(s)
         for tool_call in tool_calls:
             print(tool_call)
             tool_call_id = tool_call.id
             tool_function_name = tool_call.function.name
             tool_query_args = eval(tool_call.function.arguments)
+            print(tool_function_name, tool_query_args)
+            with open('ai_tools.json') as f:
+                ai_tools = json.load(f)
+            for tool in ai_tools:
+                if tool['function']['name'] == tool_function_name:
+                    valid_keys = tool['function']['parameters']['properties'].keys()
+                    required_keys = tool['function']['parameters']['required']
+                    break
+            print('query args before', tool_query_args)
+            tool_query_args = {k: v for k, v in tool_query_args.items() if k in valid_keys}
+            print('query args after', tool_query_args)
+            missing_keys = []
+            for key in required_keys:
+                if key not in tool_query_args:
+                    missing_keys.append(key)
+            if len(missing_keys)>0:
+                missing_keys_str = ", ".join(missing_keys)
+                tool_message = {
+                    'role': 'tool',
+                    'tool_call_id': tool_call_id,
+                    'name': tool_function_name,
+                    'content': f"Sorry, the keys {missing_keys_str} from the function you called are missing, communicate this to the user and either get what these args should be or figure out which function to call."
+                }
+                new_messages = messages + [tool_message]
+                model_response_with_function_call = oai_client.chat.completions.create(
+                    model="gpt-4o",
+                    messages=new_messages,
+                )  # get a new response from the model where it can see the function response
+                current_response = model_response_with_function_call.choices[0].message.content
+                role = "assistant"
+                new_messages = new_messages + [{'role': role, 'content': current_response}]
+                new_history = messages_to_history(new_messages)
+                generated_audios = update_song_links(generated_audios)
+                yield '', new_history, new_messages, '', '', '', '', None, generated_audios, []
+            # Step 3: Call the function and retrieve results. Append the results to the messages list.
+            if tool_function_name == 'ask_question':
+                question = songwriterAssistant.ask_question(messages)
+                question = question.replace("ask question:", "").replace("ask question ", "").replace("ask question\n", "").replace("ask question", "")
+                ## yield question in tool and assistant message
+                tool_message = {'role': 'tool', 'tool_call_id': tool_call_id, 'name': tool_function_name, 'content': question}
+                new_messages = messages + [tool_message]
+                question_message = {'role': 'assistant', 'content': question}
+                new_messages = new_messages + [question_message]
+                new_history = messages_to_history(new_messages)
+                generated_audios = update_song_links(generated_audios)
+                yield '', new_history, new_messages, '', '', '', '', None, generated_audios, []
+            elif tool_function_name == 'clarify_arguments':
+                tool_message = {'role': 'tool', 'tool_call_id': tool_call_id, 'name': tool_function_name, 'content': 'arguments to clarify: \n' + '\n'.join(tool_query_args['arguments_to_clarify'])}
+                new_messages = messages + [tool_message]
+                model_response_with_function_call = oai_client.chat.completions.create(
+                    model="gpt-4o",
+                    messages=new_messages,
+                )  # get a new response from the model where it can see the function response
+                current_response = model_response_with_function_call.choices[0].message.content
+                role = "assistant"
+                new_messages = new_messages + [{'role': role, 'content': current_response}] # + "\n\nWould you like to get an audio snippet? Or continue writing?"}]
+                # new_messages = [msg for msg in new_messages if msg['content'] is not None and msg['role'] in ['user', 'assistant']]
+                new_history = messages_to_history(new_messages)
+                generated_audios = update_song_links(generated_audios)
+                yield '', new_history, new_messages, '', '', '', '', None, generated_audios, []
+            elif tool_function_name == 'write_section':
                 snippet_instrumental_tags = tool_query_args.pop('snippet_instrumental_tags', None)
                 snippet_clip_to_continue_from = tool_query_args.pop('snippet_clip_to_continue_from', None)
                 suggested_lyrics = songwriterAssistant.write_section(**tool_query_args)
+                suggested_lyrics = suggested_lyrics.strip('`*-\n')
                 ## yield suggested lyrics in tool and assistant message
                 tool_message = {'role': 'tool', 'tool_call_id': tool_call_id, 'name': tool_function_name, 'content': suggested_lyrics}
                 current_response = model_response_with_function_call.choices[0].message.content
                 role = "assistant"
+                new_messages = new_messages + [{'role': role, 'content': current_response}] # + "\n\nWould you like to get an audio snippet? Or continue writing?"}]
                 # new_messages = [msg for msg in new_messages if msg['content'] is not None and msg['role'] in ['user', 'assistant']]
                 history = messages_to_history(new_messages)
+                # current_section, current_lyrics, curr_tags, clip_to_continue, curr_audio
+                buttons = ["revise lyrics", "generate audio snippet", "continue to next section"]
+                generated_audios = update_song_links(generated_audios)
+                clips_to_continue = gr.Dropdown(label='Clip to continue', value = snippet_clip_to_continue_from, choices=[x[3] for x in generated_audios]+[""], interactive=True)
+                yield '', history, new_messages, tool_query_args['section_name'], suggested_lyrics.split(':')[-1], snippet_instrumental_tags, clips_to_continue, None, generated_audios, buttons
+                ### DO SOMETHING TO UPDATE CURRENT GENERATION for write_section
             elif tool_function_name == 'revise_section_lyrics':
                 revised_lyrics = songwriterAssistant.revise_section_lyrics(**tool_query_args)
+                # if isinstance(revised_lyrics, list):
+                #     revised_lyrics = '\n'.join(revised_lyrics)
+                if isinstance(revised_lyrics, str) and revised_lyrics.startswith("[") and revised_lyrics.endswith("]"):
+                    try:
+                        revised_lyrics = eval(revised_lyrics)
+                        if isinstance(revised_lyrics, list):
+                            revised_lyrics = '\n'.join(revised_lyrics)
+                    except:
+                        pass
                 # ## yield revised lyrics in tool and assistant message
                 tool_message = {'role': 'tool', 'tool_call_id': tool_call_id, 'name': tool_function_name, 'content': revised_lyrics}
                 # audio_message = {'role': 'assistant', 'content': "Here's my revised lyrics:\n" + revised_lyrics + "\n\nGenerating audio snippet..."}
                 )  # get a new response from the model where it can see the function response
                 current_response = model_response_with_function_call.choices[0].message.content
+                buttons = ["revise lyrics again", "generate audio snippet with new lyrics", "continue to next section"]
                 role = "assistant"
+                new_messages = new_messages + [{'role': role, 'content': current_response + "\n\nWould you like to get an audio snippet? Or continue writing?"}]
                 # new_messages = [msg for msg in new_messages if msg['content'] is not None and msg['role'] in ['user', 'assistant']]
                 history = messages_to_history(new_messages)
+                generated_audios = update_song_links(generated_audios)
+                clips_to_continue = gr.Dropdown(label='Clip to continue', value = "", choices=[x[3] for x in generated_audios]+[""], interactive=True)
+                yield '', history, new_messages, tool_query_args['section_name'], revised_lyrics, '', clips_to_continue, None, generated_audios, buttons
             elif tool_function_name == 'revise_instrumental_tags':
                 #detangle tool_query_args dict
                 #snippet_lyrics = tool_query_args['snippet_lyrics'] + "\n[End]"
+                snippet_instrumental_tags = tool_query_args.get('current_instrumental_tags', None)
+                user_instrumental_feedback = tool_query_args.get('user_instrumental_feedback', None)
+                if snippet_instrumental_tags is None or user_instrumental_feedback is None:
+                    tool_message = {'role': 'tool', 'tool_call_id': tool_call_id, 'name': tool_function_name, 'content': 'Arguments are missing. Please clarify your feedback on the instrumental. Note that you cannot revise the genre if you haven\'t generated a snippet.'}
+                    audio_message = {'role': 'assistant', 'content': 'It seems like some information is missing. Could you please provide your feedback on the instrumental? Note that you cannot revise the genre if you haven\'t generated a snippet.'}
+                    new_messages = messages + [tool_message, audio_message]
+                    new_history = messages_to_history(new_messages)
+                    yield '', new_history, new_messages, '', '', '', None, None, generated_audios, []
+                    return
                 # if 'snippet_clip_to_continue_from' not in tool_query_args:
                 #     tool_query_args['snippet_clip_to_continue_from'] = None
                 # snippet_clip_to_continue_from = tool_query_args['snippet_clip_to_continue_from']
                 new_instrumental_tags = songwriterAssistant.revise_instrumental_tags(snippet_instrumental_tags, user_instrumental_feedback)
                 if isinstance(tool_query_args['sections_written'], str):
                     current_lyrics = tool_query_args['sections_written']
                 else:
                     current_lyrics = ""
+                import re
+                sections_list = re.findall(r'\[.*?\]', current_lyrics)
                 #current_lyrics = "\n".join(tool_query_args['sections_written'])
                 song_link = make_song(current_lyrics, new_instrumental_tags)
                 ## filter out suno link from tool query arg
+                while "https://audiopipe.suno.ai/?item_id=" not in song_link:
+                    print("BUGGED OUT, trying again...")
+                    time.sleep(5)
+                    song_link = make_song(current_lyrics, new_instrumental_tags)
                 clip_id = song_link.split("https://audiopipe.suno.ai/?item_id=")[1]
                 tool_message = {'role': 'tool', 'tool_call_id': tool_call_id, 'name': tool_function_name, 'content': f'new instrumental tags: {new_instrumental_tags}, clip id: {clip_id}'}
+                audio_message = {'role': 'assistant', 'content': f'Sure! I\'ve revised the instrumental tags: {new_instrumental_tags}\nCurrent lyrics: {current_lyrics}\n\n <audio controls autoplay><source src="{song_link}" type="audio/mp3"></audio>'}
                 audio_message['content'] += f'\n\nWhat do you think?'
                 new_messages = messages + [tool_message, audio_message]
                 new_history = messages_to_history(new_messages)
+                # current_section, current_lyrics, curr_tags, clip_to_continue, curr_audio
+                if len(sections_list) > 0:
+                    section_name = f"Up to {sections_list[-1]}"
+                else:
+                    section_name = "Up to latest section"
+                section_name = determine_title(section_name, generated_audios)
+                generated_audios.append((song_link, current_lyrics, new_instrumental_tags, section_name, "streaming"))
+                generated_audios = update_song_links(generated_audios)
+                clips_to_continue = gr.Dropdown(label='Clip to continue', value = "", choices=[x[3] for x in generated_audios]+[""], interactive=True)
+                buttons = ["return to original instrumental", "re-revise genre", "revise lyrics", "merge snippets", "continue to next section"]
+                yield '', new_history, new_messages, ', '.join(sections_list), current_lyrics, new_instrumental_tags, clips_to_continue, f'<audio controls><source src="{song_link}" type="audio/mp3"></audio>', generated_audios, buttons
+            elif tool_function_name == 'revise_section_lyrics_and_instrumental':
+                snippet_instrumental_tags = tool_query_args.pop('current_instrumental_tags', None)
+                user_instrumental_feedback = tool_query_args.pop('user_instrumental_feedback', None)
+                snippet_clip_to_continue_from = tool_query_args.pop('snippet_clip_to_continue_from', None)
+                # Revise section lyrics
+                revised_lyrics = songwriterAssistant.revise_section_lyrics(**tool_query_args)
+                # Revise instrumental tags
+                new_instrumental_tags = songwriterAssistant.revise_instrumental_tags(snippet_instrumental_tags, user_instrumental_feedback)
+                song_link = make_song(revised_lyrics, new_instrumental_tags, snippet_clip_to_continue_from)
+                while "https://audiopipe.suno.ai/?item_id=" not in song_link:
+                    print("BUGGED OUT, trying again...")
+                    time.sleep(5)
+                    song_link = make_song(revised_lyrics, new_instrumental_tags, snippet_clip_to_continue_from)
+                clip_id = song_link.split("https://audiopipe.suno.ai/?item_id=")[1]
+                tool_message_instrumental = {'role': 'tool', 'tool_call_id': tool_call_id, 'name': tool_function_name, 'content': f'revised lyrics: {revised_lyrics}\nrevised instrumental tags: {new_instrumental_tags}, clip id: {clip_id}'}
+                audio_message = {'role': 'assistant', 'content': f'Sure! I\'ve revised the lyrics and instrumental tags: {revised_lyrics}\nRevised lyrics: {revised_lyrics}\n\n <audio controls autoplay><source src="{song_link}" type="audio/mp3"></audio>'}
+                audio_message['content'] += f'\n\nWhat do you think?'
+                new_messages = messages + [tool_message_instrumental, audio_message]
                 new_history = messages_to_history(new_messages)
+                generated_audios.append((song_link, revised_lyrics, new_instrumental_tags, tool_query_args["section_name"], "streaming"))
+                generated_audios = update_song_links(generated_audios)
+                clips_to_continue = gr.Dropdown(label='Clip to continue', value = snippet_clip_to_continue_from, choices=[x[3] for x in generated_audios]+[""], interactive=True)
+                buttons = ["return to original instrumental", "re-revise genre", "revise lyrics", "merge snippets", "continue to next section"]
+                yield '', new_history, new_messages, tool_query_args["section_name"], revised_lyrics, new_instrumental_tags, clips_to_continue, f'<audio controls><source src="{song_link}" type="audio/mp3"></audio>', generated_audios, buttons
+            elif tool_function_name == 'merge_all_snippets':
+                updated_clip_url, updated_lyrics, updated_tags, clips_list = concat_snippets(tool_query_args['last_snippet_id'])
+                if "still streaming" in updated_clip_url:
+                    tool_message = {'role': 'tool', 'tool_call_id': tool_call_id, 'name': tool_function_name, 'content': f'still streaming, try again later'}
+                    audio_message = {'role': 'assistant', 'content': f'Unfortunately the generated clip audio is still being streamed, so you can merge later when it is fully generated.'}
+                    new_messages = messages + [tool_message, audio_message]
+                    new_history = messages_to_history(new_messages)
+                    clips_to_continue = gr.Dropdown(label='Clip to continue', value = "", choices=[x[3] for x in generated_audios]+[""], interactive=True)
+                    yield '', new_history, new_messages, "", "", "", clips_to_continue, None, generated_audios, ["merge snippets", "continue to next section"]
+                else:
+                    updated_clip_id = updated_clip_url.split("https://audiopipe.suno.ai/?item_id=")[1]
+                    #pass this info in new tool and assistant message
+                    tool_message = {'role': 'tool', 'tool_call_id': tool_call_id, 'name': tool_function_name, 'content': f'updated clip id: {updated_clip_id}\nupdated lyrics: {updated_lyrics}\nupdated clips path: {clips_list}'}
+                    audio_message = {'role': 'assistant', 'content': f'Sure! All the clips are now merged. <p>updated lyrics: {updated_lyrics}</p><audio controls autoplay><source src="{updated_clip_url}" type="audio/mp3"></audio><p>updated clips path: {clips_list}</p>'}
+                    sections_list = [line for line in current_lyrics.split('\n') if line.startswith('[') and line.endswith(']')]
+                    new_messages = messages + [tool_message, audio_message]
+                    new_history = messages_to_history(new_messages)
+                    if len(sections_list) > 0:
+                        section_name = "Merge up to " + sections_list[-1]
+                    else:
+                        section_name = "Merge up to latest section"
+                    section_name = determine_title(section_name, generated_audios)
+                    # current_section, current_lyrics, curr_tags, clip_to_continue, curr_audio
+                    generated_audios.append((updated_clip_url, updated_lyrics, updated_tags, section_name, "streaming"))
+                    generated_audios = update_song_links(generated_audios)
+                    clips_to_continue = gr.Dropdown(label='Clip to continue', value = "", choices=[x[3] for x in generated_audios]+[""], interactive=True)
+                    yield '', new_history, new_messages, section_name, updated_lyrics, updated_tags, clips_to_continue, f'<audio controls><source src="{updated_clip_url}" type="audio/mp3"></audio>', generated_audios, []
             elif tool_function_name == 'finish_full_song':
                 ## args are sections_to_be_written, relevant_ideas, last_snippet_id, sni
                 ## STEP 1: WRITE ALL LYRICS using songwriterAssistant
                 remaining_lyrics = songwriterAssistant.write_all_lyrics(**tool_query_args)
                 full_lyrics = current_lyrics + remaining_lyrics + "\n[End]"
+                # current_section, current_lyrics, curr_tags, clip_to_continue, curr_audio
+                yield '', history, messages, "Full Song", full_lyrics, snippet_instrumental_tags, snippet_clip_to_continue_from, None, generated_audios, []
                 ## STEP 2: MAKE SONG FOR REMAINING LYRICS
                 song_link = make_song(remaining_lyrics, snippet_instrumental_tags, snippet_clip_to_continue_from)
                 new_messages = messages + [tool_message, audio_message]
                 new_history = messages_to_history(new_messages)
+                generated_audios.append((song_link, remaining_lyrics, snippet_instrumental_tags, "Rest of Song", "streaming"))
+                yield '', new_history, new_messages, "Rest of Song", full_lyrics, snippet_instrumental_tags, snippet_clip_to_continue_from, song_link, generated_audios, []
                 ## STEP 3: MERGE FULL SONG
                 if snippet_clip_to_continue_from not in [None, ""]:
+                    updated_clip_url = "still streaming"
+                    while "still streaming" in updated_clip_url:
+                        updated_clip_url, updated_lyrics, updated_tags, clips_list = concat_snippets(song_link.split("https://audiopipe.suno.ai/?item_id=")[1])
                 else:
                     updated_clip_url, updated_lyrics, clips_list = song_link, remaining_lyrics, []
                 ## YIELD UPDATED CLIP URL, LYRICS, AND CLIPS LIST
                 #tool and assistant message
                 tool_message = {'role': 'tool', 'tool_call_id': tool_call_id, 'name': tool_function_name, 'content': f'updated clip id: {updated_clip_id}\nupdated lyrics: {updated_lyrics}\nupdated clips path: {clips_list}'}
+                audio_message = {'role': 'assistant', 'content': f'All done! Thank you for participating :) \nFinal Lyrics: {full_lyrics} \nFinal song: <audio controls autoplay><source src="{updated_clip_url}" type="audio/mp3"></audio>'}
                 new_messages = messages + [tool_message, audio_message]
                 new_history = messages_to_history(new_messages)
+                generated_audios.append((updated_clip_url, updated_lyrics, updated_tags, "Full Song", "streaming"))
+                generated_audios = update_song_links(generated_audios)
+                clips_to_continue = gr.Dropdown(label='Clip to continue', value = "", choices=[x[3] for x in generated_audios]+[""], interactive=True)
+                yield '', new_history, new_messages, "Full Song", full_lyrics, snippet_instrumental_tags, clips_to_continue, f'<audio controls><source src="{song_link}" type="audio/mp3"></audio>', generated_audios, []
             elif tool_function_name == 'get_audio_snippet':
                 #detangle tool_query_args dict
                 snippet_lyrics = tool_query_args['snippet_lyrics'] + "\n[End]"
                 snippet_instrumental_tags = tool_query_args['snippet_instrumental_tags']
+                snippet_clip_to_continue_from = tool_query_args.get('snippet_clip_to_continue_from', None)
                 song_link = make_song(snippet_lyrics, snippet_instrumental_tags, snippet_clip_to_continue_from)
+                if "still streaming" in song_link:
+                    tool_message = {
+                        'role': 'tool',
+                        'tool_call_id': tool_call_id,
+                        'name': tool_function_name,
+                        'content': 'The snippet to extend is still streaming. Please try generating this audio snippet in a little bit.'
+                    }
+                    new_messages = messages + [tool_message]
+                    model_response_with_function_call = oai_client.chat.completions.create(
+                        model="gpt-4o",
+                        messages=new_messages,
+                    )  # get a new response from the model where it can see the function response
+                    current_response = model_response_with_function_call.choices[0].message.content
+                    role = "assistant"
+                    new_messages = new_messages + [{'role': role, 'content': current_response}]
+                    new_history = messages_to_history(new_messages)
+                    generated_audios = update_song_links(generated_audios)
+                    clips_to_continue = gr.Dropdown(label='Clip to continue', value = "", choices=[x[3] for x in generated_audios]+[""], interactive=True)
+                    buttons = ["generate audio snippet", "revise lyrics", "revise genre", "merge snippets", "continue to next section"]
+                    yield '', new_history, new_messages, snippet_lyrics.split("\n")[0], snippet_lyrics, snippet_instrumental_tags, clips_to_continue, None, generated_audios, buttons
+                    return
+                print("MAKE SONG IS DONE")
                 ## filter out suno link from tool query arg
                 clip_id = song_link.split("https://audiopipe.suno.ai/?item_id=")[1]
+                tool_message = {'role': 'tool', 'tool_call_id': tool_call_id, 'name': tool_function_name, 'content': f'snippet lyrics: {snippet_lyrics}\ninstrumental tags: {tool_query_args["snippet_instrumental_tags"]}, clip id: {clip_id}'}
+                audio_message_content = "Here's what I've come up with:\n" + snippet_lyrics + '\n\n' + f'<audio controls autoplay><source src="{song_link}" type="audio/mp3"></audio><p>instrumental tags: {tool_query_args["snippet_instrumental_tags"]}</p>'
                 audio_message_content += f'<p>continued from clip: {snippet_clip_to_continue_from}</p>'
+                audio_message_content += "What do you think?"
                 audio_message = {'role': 'assistant', 'content': audio_message_content}
+                section_name = snippet_lyrics.split("\n")[0].strip('[]* ')
+                section_name = determine_title(section_name, generated_audios)
+                #audio_message = {'role': 'assistant', 'content': gr.Audio(value=song_link, label=section_name, interactive=False, show_label=False, waveform_options={"show_controls": False})}
                 new_messages = messages + [tool_message, audio_message]
                 new_history = messages_to_history(new_messages)
+                print("AUDIO MESSAGE DONE")
+                generated_audios.append((song_link, snippet_lyrics, snippet_instrumental_tags, section_name, "streaming"))
+                generated_audios = update_song_links(generated_audios)
+                buttons = ["revise lyrics", "revise genre", "merge snippets", "continue to next section"]
+                clips_to_continue = gr.Dropdown(label='Clip to continue', value = snippet_clip_to_continue_from, choices=[x[3] for x in generated_audios]+[""], interactive=True)
+                yield '', new_history, new_messages, snippet_lyrics.split("\n")[0], snippet_lyrics, snippet_instrumental_tags, clips_to_continue, f'<audio controls><source src="{song_link}" type="audio/mp3"></audio>', generated_audios, buttons
             else:
                 print(f"Error: function {tool_function_name} does not exist")
     else:
         # Model did not identify a function to call, result can be returned to the user

data/conversation_history.json ADDED Viewed

	@@ -0,0 +1 @@

+ [{"role": "system", "content": "You are an expert at writing songs. You are with an everyday person, and you will write the lyrics of the song based on this person's life by asking questions about a story of theirs. Design your questions using ask_question to help you understand the user's story, so you can write a song about the user's experience that resonates with them. We have equipped you with a set of tools to help you write this story; please use them. You are very good at making the user feel comfortable, understood, and ready to share their feelings and story. Occasionally (every 2 messages or so) you will suggest some lyrics, one section at a time, and see what the user thinks of them. Do not suggest or ask for thoughts on more than one section at a time. Be concise and youthful."}, {"role": "user", "content": "The user has stated the following:\n I have a story that could make this concept work well. The title is Future Dreams , it's about A song about envisioning your ideal future where dreams come true, adventures unfold, and loved ones stand by your side. with a genre Pop and I think this should be the structure: \nVerse 1: 4 measures\nChorus 1: 8 measures\nVerse 2: 8 measures\nChorus 2: 8 measures\nVerse 3: 8 measures\nChorus 3: 8 measures\n Introduce yourself and kick-off the songwriting process with a question."}, {"role": "assistant", "content": "Hey there! I'm excited to help you bring \"Future Dreams\" to life. It sounds like an incredible concept. \n\nTo get started, can you tell me about a specific dream or adventure you've always envisioned for your future? This could be something big, like traveling the world, or something personal, like achieving a lifelong goal."}, {"role": "user", "content": "i've envisioned living in spain after 30"}, {"role": "assistant", "content": null, "tool_calls": [

gpt_calls.py CHANGED Viewed

@@ -31,6 +31,33 @@ class AI_Songwriter:
         {}"""
     def write_section(self, section_name, section_description, relevant_ideas, section_length, sections_written=None, overall_song_description=None):
         instruction = f"Write a {section_name} of length {section_length} that that incorporates the following ideas"
         if sections_written is not None:
@@ -70,14 +97,20 @@ class AI_Songwriter:
             messages=convo,
         )
-        return "Pass this back to the user: \n" + response.choices[0].message.content
     def revise_section_lyrics(self, section_name, current_section, lines_to_revise, relevant_ideas=None, relevant_words=None):
         lines_to_infill = ", ".join([str(x) for x in lines_to_revise])
         full_incomplete_verse = current_section.strip("\n ").split("\n")
         for line_num in lines_to_revise:
-            full_incomplete_verse[line_num-1] = '___'
         line_phrase = "lines" if len(lines_to_infill) > 1 else "line"
         line_phrase = str(len(lines_to_infill)) + " " + line_phrase
@@ -173,37 +206,4 @@ class AI_Songwriter:
         )
         return response.choices[0].message.content
-    # def get_relevant_ideas(self, section_name, section_description, conversation_history):
-    #     instruction = f"Identify the relevant ideas from the conversation history that can be used in the {section_name} given its description. Output your ideas as a bullet separated list (ie - idea 1, - idea 2) such that each idea is in the format 'I ...', 'I ...', etc."
-    #     input = f"""Section Description: {section_description}\nConversation History:{conversation_history}\nRelevant ideas:"""
-    #     prompt = self.alpaca_prompt.format(instruction, input, "")
-    #     convo = [
-    #         {
-    #             "role": "user",
-    #             "content": prompt,
-    #         },
-    #     ]
-    #     response = self.oai_client.chat.completions.create(
-    #         model="gpt-4o",
-    #         messages=convo,
-    #     )
-    #     return response.choices[0].message.content
-#     def get_audio_snippet(self, snippet_lyrics, snippet_instrumental_tags, snippet_clip_to_continue):
-#         # add a message of user asking for audio snippet
-#         song_link = make_song(genre_input, lyrics, new_tags, last_clip)
-# #     # Add the audio to the message and history
-# #     audio_message = {'role': 'assistant', 'content': f'<audio controls autoplay><source src="{song_link}" type="audio/mp3"></audio>'}
-# #     new_messages = messages + [snippet_request, audio_message]
-# #     new_history = messages_to_history(new_messages)
-# #     return new_history, new_messages
-#         pass

         {}"""
+    def ask_question(self, messages):
+        convo = messages[:-1]
+        instruction = "Based on this conversation history, respond to the user acknowledging their most recent response and ask a concise question to further learn more about the user's story."
+        ## iterate thru messages and format them into a single string where each message is separated by a newline (ie Assistant: ...\n User: ...\n)
+        convo_str = ""
+        for message in convo:
+            convo_str += f"{message['role']}: {message['content']}\n"
+        convo_str += "Assistant:"
+        input = f"{instruction}\nConversation History:\n{convo_str}"
+        response = self.oai_client.chat.completions.create(
+            model="gpt-4o",
+            messages=[
+                {
+                    "role": "user",
+                    "content": input
+                }
+            ],
+        )
+        return response.choices[0].message.content
     def write_section(self, section_name, section_description, relevant_ideas, section_length, sections_written=None, overall_song_description=None):
         instruction = f"Write a {section_name} of length {section_length} that that incorporates the following ideas"
         if sections_written is not None:
             messages=convo,
         )
+        return "Pass this back to the user and ask if they would like to receive an audio snippet or make any revisions before moving to the next section: \n" + response.choices[0].message.content
     def revise_section_lyrics(self, section_name, current_section, lines_to_revise, relevant_ideas=None, relevant_words=None):
         lines_to_infill = ", ".join([str(x) for x in lines_to_revise])
         full_incomplete_verse = current_section.strip("\n ").split("\n")
+        max_line_num = max(lines_to_revise)
+        if max_line_num > len(full_incomplete_verse):
+            full_incomplete_verse.extend([''] * (max_line_num - len(full_incomplete_verse)))
         for line_num in lines_to_revise:
+            if line_num <= len(full_incomplete_verse):
+                full_incomplete_verse[line_num-1] = '___'
         line_phrase = "lines" if len(lines_to_infill) > 1 else "line"
         line_phrase = str(len(lines_to_infill)) + " " + line_phrase
         )
         return response.choices[0].message.content

suno.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import requests
 import time
 import os
 base_url = "http://127.0.0.1:8000"
 api_endpoint_submit = f"{base_url}/generate/"
@@ -52,6 +54,25 @@ def generate_song(tags, prompt, save_path, clip_id=None, continue_at=30):
           data["continue_at"] = continue_at
       else:
           data["continue_at"] = 30
   response = requests.post(api_endpoint_submit, json=data) #,headers=headers)
   response_data = response.json()
@@ -107,7 +128,6 @@ def generate_song(tags, prompt, save_path, clip_id=None, continue_at=30):
   return url
 def concat_snippets(clip_id):
   concat_url = f"{api_endpoint_concat}?clip_id={clip_id}"
   feed_url = api_endpoint_info + clip_id
@@ -119,7 +139,10 @@ def concat_snippets(clip_id):
       print("No data in response, retrying", response_data)
       time.sleep(2)
       continue
-    if response_data[0]["status"] == 'complete':
       break
     else:
       time.sleep(8)
@@ -137,6 +160,7 @@ def concat_snippets(clip_id):
     return url, lyrics, concatenated_clips
   lyrics = response_data["metadata"]["prompt"]
   concatenated_clips = [x["id"] for x in response_data["metadata"]["concat_history"]]
   song_id = response_data["id"]
@@ -144,12 +168,13 @@ def concat_snippets(clip_id):
   while True:
     response = requests.get(api_endpoint_info + song_id, headers=headers)
     response_data = response.json()
     if response.status_code != 200:
       print("No data in response, retrying", response_data)
       time.sleep(2)
       continue
       # print("Got response", response_data)
-    if response_data[0]["status"] == 'streaming':
       break
     else:
       time.sleep(2)
@@ -160,6 +185,34 @@ def concat_snippets(clip_id):
   print("Got song", response_data[0]["audio_url"])
   url = response_data[0]["audio_url"]
-  return url, lyrics, concatenated_clips

 import requests
 import time
 import os
+import asyncio
+import httpx
 base_url = "http://127.0.0.1:8000"
 api_endpoint_submit = f"{base_url}/generate/"
           data["continue_at"] = continue_at
       else:
           data["continue_at"] = 30
+      feed_url = api_endpoint_info + clip_id
+      response = requests.get(feed_url, headers=headers)
+      response_data = response.json()
+      while True:
+        if response.status_code != 200:
+          print("No data in response, retrying", response_data)
+          time.sleep(2)
+          continue
+        elif response_data[0]["status"] == 'streaming':
+          return "Snippet to extend is still streaming, please wait to request later."
+        if response_data[0]["status"] == 'complete':
+          break
+        else:
+          time.sleep(8)
+          continue
   response = requests.post(api_endpoint_submit, json=data) #,headers=headers)
   response_data = response.json()
   return url
 def concat_snippets(clip_id):
   concat_url = f"{api_endpoint_concat}?clip_id={clip_id}"
   feed_url = api_endpoint_info + clip_id
       print("No data in response, retrying", response_data)
       time.sleep(2)
       continue
+    ## CATCH THE CASE WHERE response_data a list of length 1 versus just a dictionary straight up
+    elif response_data["status"] == 'streaming':
+       return "Song is still streaming, please wait to request later.", None, None, []
+    if response_data["status"] == 'complete':
       break
     else:
       time.sleep(8)
     return url, lyrics, concatenated_clips
   lyrics = response_data["metadata"]["prompt"]
+  tags = response_data["metadata"]["tags"]
   concatenated_clips = [x["id"] for x in response_data["metadata"]["concat_history"]]
   song_id = response_data["id"]
   while True:
     response = requests.get(api_endpoint_info + song_id, headers=headers)
     response_data = response.json()
+    print("feed response for concatenated song", response_data)
     if response.status_code != 200:
       print("No data in response, retrying", response_data)
       time.sleep(2)
       continue
       # print("Got response", response_data)
+    if response_data[0]["status"] == 'streaming' or response_data[0]["audio_url"] != "" or response_data[0]["status"] == 'complete':
       break
     else:
       time.sleep(2)
   print("Got song", response_data[0]["audio_url"])
   url = response_data[0]["audio_url"]
+  return url, lyrics, tags, concatenated_clips
+def update_song_links(generated_audios):
+  updated_generated_audios = generated_audios.copy()
+  for i, song_info in enumerate(generated_audios):
+    clip_path, lyrics, instrumental, title, status = song_info
+    if "audiopipe.suno.ai" in clip_path or status == "streaming":
+        clip_id = clip_path.split("?item_id=")[-1]
+        feed_url = api_endpoint_info + clip_id
+        response = requests.get(feed_url, headers=headers)
+        response_data = response.json()
+        if response.status_code != 200:
+          print("No data in response, retrying", response_data)
+          continue
+        elif response_data[0]["status"] == 'streaming':
+          print("still streaming, update later")
+          continue
+        if response_data[0]["status"] == 'complete':
+          updated_clip_path = response_data[0]["audio_url"]
+          print(updated_clip_path)
+          updated_generated_audios[i] = (updated_clip_path, lyrics, instrumental, title, "complete")
+  return updated_generated_audios

utils/song_utils.py CHANGED Viewed

@@ -1,8 +1,15 @@
 import os
 from openai import OpenAI
 from typing import Optional, Tuple, List, Dict
 client_key = os.getenv("OPEN_AI_KEY")
 oai_client = OpenAI(
     api_key=client_key,
 )
@@ -87,6 +94,7 @@ def messages_to_history(messages: Messages) -> Tuple[str, History]:
     """
     assert messages[0]['role'] == 'system' and messages[1]['role'] == 'user'
     # Filter out 'tool' messages and those containing 'tool_calls'
     messages_for_parsing = [msg for msg in messages if msg['role'] != 'tool' and 'tool_calls' not in msg]
@@ -98,8 +106,8 @@ def messages_to_history(messages: Messages) -> Tuple[str, History]:
     # Create history from user-assistant message pairs
     history = [
-        (q['content'], r['content'])
-        for q, r in zip(messages_for_parsing[1::2], messages_for_parsing[2::2])
     ]
     return history
@@ -120,8 +128,8 @@ def get_starting_messages(song_lengths: str, song_title: str, song_blurb: str, s
     """
     system_prompt = (
         "You are an expert at writing songs. You are with an everyday person, and you will write the lyrics of the song "
-        "based on this person's life by asking questions about a story of theirs. Design your questions on your own, without "
-        "using your tools, to help you understand the user's story, so you can write a song about the user's experience that "
         "resonates with them. We have equipped you with a set of tools to help you write this story; please use them. You are "
         "very good at making the user feel comfortable, understood, and ready to share their feelings and story. Occasionally "
         "(every 2 messages or so) you will suggest some lyrics, one section at a time, and see what the user thinks of them. "
@@ -145,7 +153,11 @@ def get_starting_messages(song_lengths: str, song_title: str, song_blurb: str, s
     first_message = first_msg_res.choices[0].message.content
     starting_messages = initial_messages + [{'role': 'assistant', 'content': first_message}]
-    return starting_messages, messages_to_history(starting_messages)
 def update_song_details(instrumental_output: str) -> Tuple[Optional[str], Optional[str], Optional[str]]:
     """

 import os
 from openai import OpenAI
 from typing import Optional, Tuple, List, Dict
+from dotenv import load_dotenv
+from gradio import ChatMessage
+import gradio as gr
+# Load environment variables from .env file
+load_dotenv()
 client_key = os.getenv("OPEN_AI_KEY")
+print(client_key)
 oai_client = OpenAI(
     api_key=client_key,
 )
     """
     assert messages[0]['role'] == 'system' and messages[1]['role'] == 'user'
     # Filter out 'tool' messages and those containing 'tool_calls'
     messages_for_parsing = [msg for msg in messages if msg['role'] != 'tool' and 'tool_calls' not in msg]
     # Create history from user-assistant message pairs
     history = [
+        ChatMessage(role = q['role'], content = q['content'])
+        for q in messages_for_parsing[2:]
     ]
     return history
     """
     system_prompt = (
         "You are an expert at writing songs. You are with an everyday person, and you will write the lyrics of the song "
+        "based on this person's life by asking questions about a story of theirs. Design your questions using ask_question "
+        " to help you understand the user's story, so you can write a song about the user's experience that "
         "resonates with them. We have equipped you with a set of tools to help you write this story; please use them. You are "
         "very good at making the user feel comfortable, understood, and ready to share their feelings and story. Occasionally "
         "(every 2 messages or so) you will suggest some lyrics, one section at a time, and see what the user thinks of them. "
     first_message = first_msg_res.choices[0].message.content
     starting_messages = initial_messages + [{'role': 'assistant', 'content': first_message}]
+    history = [ChatMessage(role = x['role'], content = x['content']) for x in starting_messages]
+    history = history[2:]
+    return starting_messages, history
 def update_song_details(instrumental_output: str) -> Tuple[Optional[str], Optional[str], Optional[str]]:
     """