Spaces:

lordvader31
/

almithal

Running

App Files Files Community

Keane Moraes commited on May 3, 2023

Commit

2abcb58

•

1 Parent(s): 2b58524

changes to q&a and mindmap varaibles

Browse files

Files changed (1) hide show

app.py +84 -121

app.py CHANGED Viewed

@@ -42,42 +42,8 @@ takeaways = []
 folder_name = "./tests"
 input_accepted = False
 is_completed_analysis = False
-def get_initial_message():
-    messages=[
-            {"role": "system", "content": "You are a helpful AI Tutor. Who anwers brief questions about AI."},
-            {"role": "user", "content": "I want to learn AI"},
-            {"role": "assistant", "content": "Thats awesome, what do you want to know aboout AI"}
-        ]
-    return messages
-nodes = []
-edges = []
-nodes.append( Node(id="Spiderman",
-                   label="Peter Parker",
-                   size=25,
-                   shape="circularImage",
-                   image="http://marvel-force-chart.surge.sh/marvel_force_chart_img/top_spiderman.png")
-            ) # includes **kwargs
-nodes.append( Node(id="Captain_Marvel",
-                   size=25,
-                   shape="circularImage",
-                   image="http://marvel-force-chart.surge.sh/marvel_force_chart_img/top_captainmarvel.png")
-            )
-edges.append( Edge(source="Captain_Marvel",
-                   label="friend_of",
-                   target="Spiderman",
-                   )
-            )
-config = Config(width=750,
-                height=950,
-                directed=True,
-                physics=True,
-                hierarchical=False,
-                )
 user_secret = os.getenv("OPENAI_API_KEY")
@@ -151,10 +117,6 @@ with st.sidebar:
         else:
             st.error("Please type in your youtube link or upload the PDF")
             st.experimental_rerun()
-        # Save the transcript information
-        with open(f"{folder_name}/data_transcription.json", "w") as f:
-            json.dump(data_transcription, f, indent=4)
         # Generate embeddings
         if not os.path.exists(f"{folder_name}/word_embeddings.csv"):
@@ -227,11 +189,11 @@ tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs(["Introduction", "Summary", "Transc
 # =========== INTRODUCTION ===========
 with tab1:
-    st.subheader("Introduction")
     st.markdown("## How do I use this?")
     st.markdown("Do one of the following")
     st.markdown('* Type in your youtube URL that you want worked on')
     st.markdown('* Place the PDF file that you want worked on')
     st.markdown("**Once the file / url has finished saving, a 'Start Analysis' button will appear. Click on this button to begin the note generation**")
     st.warning("NOTE: This is just a demo product in alpha testing. Any and all bugs will soon be fixed")
     st.warning("After the note taking is done, you will see multiple tabs for more information")
@@ -278,88 +240,89 @@ with tab5:
         st.warning("Please wait for the analysis to finish")
 # =========== QUERY BOT ===========
-with tab6:
-    if 'generated' not in st.session_state:
-        st.session_state['generated'] = []
-    if 'past' not in st.session_state:
-        st.session_state['past'] = []
-    def get_text():
-        st.header("Ask me something about the video:")
-        input_text = st.text_input("You: ", key="prompt")
-        return input_text
-    def get_embedding_text(prompt):
-        response = openai.Embedding.create(
-            input= prompt.strip(),
-            model="text-embedding-ada-002"
-        )
-        q_embedding = response['data'][0]['embedding']
-        print("the folder name at got here 1.5 is ", folder_name)
-        df = pd.read_csv(f'{folder_name}/word_embeddings.csv', index_col=0)
-        df['embedding'] = df['embedding'].apply(eval).apply(np.array)
-        df['distances'] = distances_from_embeddings(q_embedding, df['embedding'].values, distance_metric='cosine')
-        returns = []
-        # Sort by distance with 2 hints
-        for i, row in df.sort_values('distances', ascending=True).head(4).iterrows():
-            # Else add it to the text that is being returned
-            returns.append(row["text"])
-        # Return the context
-        return "\n\n###\n\n".join(returns)
-    def generate_response(prompt):
-        one_shot_prompt = '''
-            I am YoutubeGPT, a highly intelligent question answering bot.
-            If you ask me a question that is rooted in truth, I will give you the answer.
-            Q: What is human life expectancy in the United States?
-            A: Human life expectancy in the United States is 78 years.
-            Q: '''+prompt+'''
-            A:
-        '''
-        completions = openai.Completion.create(
-            engine = "text-davinci-003",
-            prompt = one_shot_prompt,
-            max_tokens = 1024,
-            n = 1,
-            stop=["Q:"],
-            temperature=0.5,
-        )
-        message = completions.choices[0].text
-        return message
     if is_completed_analysis:
-        user_input = get_text()
-        print("user input is ", user_input)
-        print("the folder name at got here 0.5 is ", folder_name)
-    else:
-        user_input = None
-    if 'messages' not in st.session_state:
-        st.session_state['messages'] = get_initial_message()
-    if user_input:
-        print("got here 1")
-        print("the folder name at got here 1.5 is ", folder_name)
-        text_embedding = get_embedding_text(user_input)
-        print("the folder name at got here 1.5 is ", folder_name)
-        print("got here 2")
-        title = data_transcription['title']
-        string_title = "\n\n###\n\n".join(title)
-        user_input_embedding = 'Using this context: "'+string_title+'. '+text_embedding+'", answer the following question. \n'+user_input
-        print("got here 3")
-        output = generate_response(user_input_embedding)
-        st.session_state.past.append(user_input)
-        st.session_state.generated.append(output)
-    if st.session_state['generated']:
-        for i in range(len(st.session_state['generated'])-1, -1, -1):
-            message(st.session_state["generated"][i], key=str(i))
-            message(st.session_state['past'][i], is_user=True, key=str(i) + '_user')
 # st.header("What else")

 folder_name = "./tests"
 input_accepted = False
 is_completed_analysis = False
+if not os.path.exists(folder_name):
+    os.mkdir(folder_name)
 user_secret = os.getenv("OPENAI_API_KEY")
         else:
             st.error("Please type in your youtube link or upload the PDF")
             st.experimental_rerun()
         # Generate embeddings
         if not os.path.exists(f"{folder_name}/word_embeddings.csv"):
 # =========== INTRODUCTION ===========
 with tab1:
     st.markdown("## How do I use this?")
     st.markdown("Do one of the following")
     st.markdown('* Type in your youtube URL that you want worked on')
     st.markdown('* Place the PDF file that you want worked on')
+    st.markdown('* Place the audio file that you want worked on')
     st.markdown("**Once the file / url has finished saving, a 'Start Analysis' button will appear. Click on this button to begin the note generation**")
     st.warning("NOTE: This is just a demo product in alpha testing. Any and all bugs will soon be fixed")
     st.warning("After the note taking is done, you will see multiple tabs for more information")
         st.warning("Please wait for the analysis to finish")
 # =========== QUERY BOT ===========
+with tab6:
     if is_completed_analysis:
+        if 'generated' not in st.session_state:
+            st.session_state['generated'] = []
+        if 'past' not in st.session_state:
+            st.session_state['past'] = []
+        def get_text():
+            st.header("Ask me something about the video:")
+            input_text = st.text_input("You: ", key="prompt")
+            return input_text
+        def get_embedding_text(prompt):
+            response = openai.Embedding.create(
+                input= prompt.strip(),
+                model="text-embedding-ada-002"
+            )
+            q_embedding = response['data'][0]['embedding']
+            print("the folder name at got here 1.5 is ", folder_name)
+            # df = pd.read_csv(f'{folder_name}/word_embeddings.csv', index_col=0)
+            data['embedding'] = data['embedding'].apply(eval).apply(np.array)
+            data['distances'] = distances_from_embeddings(q_embedding, data['embedding'].values, distance_metric='cosine')
+            returns = []
+            # Sort by distance with 2 hints
+            for i, row in data.sort_values('distances', ascending=True).head(4).iterrows():
+                # Else add it to the text that is being returned
+                returns.append(row["text"])
+            # Return the context
+            return "\n\n###\n\n".join(returns)
+        def generate_response(prompt):
+            one_shot_prompt = '''
+                I am YoutubeGPT, a highly intelligent question answering bot.
+                If you ask me a question that is rooted in truth, I will give you the answer.
+                Q: What is human life expectancy in the United States?
+                A: Human life expectancy in the United States is 78 years.
+                Q: '''+prompt+'''
+                A:
+            '''
+            completions = openai.Completion.create(
+                engine = "text-davinci-003",
+                prompt = one_shot_prompt,
+                max_tokens = 1024,
+                n = 1,
+                stop=["Q:"],
+                temperature=0.5,
+            )
+            message = completions.choices[0].text
+            return message
+        if is_completed_analysis:
+            user_input = get_text()
+            print("user input is ", user_input)
+            print("the folder name at got here 0.5 is ", folder_name)
+        else:
+            user_input = None
+        if 'messages' not in st.session_state:
+            st.session_state['messages'] = get_initial_message()
+        if user_input:
+            print("got here 1")
+            print("the folder name at got here 1.5 is ", folder_name)
+            text_embedding = get_embedding_text(user_input)
+            print("the folder name at got here 1.5 is ", folder_name)
+            print("got here 2")
+            title = data_transcription['title']
+            string_title = "\n\n###\n\n".join(title)
+            user_input_embedding = 'Using this context: "'+string_title+'. '+text_embedding+'", answer the following question. \n'+user_input
+            print("got here 3")
+            output = generate_response(user_input_embedding)
+            st.session_state.past.append(user_input)
+            st.session_state.generated.append(output)
+        if st.session_state['generated']:
+            for i in range(len(st.session_state['generated'])-1, -1, -1):
+                message(st.session_state["generated"][i], key=str(i))
+                message(st.session_state['past'][i], is_user=True, key=str(i) + '_user')
 # st.header("What else")