Spaces:

awacke1
/

GPT-4o-omni-text-audio-image-video

Running

App Files Files Community

awacke1 commited on Jun 2, 2024

Commit

8039f47

verified ·

1 Parent(s): b642de5

Update app.py

Browse files

Files changed (1) hide show

app.py +294 -274

app.py CHANGED Viewed

@@ -53,6 +53,24 @@ st.set_page_config(
     }
 )
 client = OpenAI(api_key= os.getenv('OPENAI_API_KEY'), organization=os.getenv('OPENAI_ORG_ID'))
 MODEL = "gpt-4o-2024-05-13"
 if "openai_model" not in st.session_state:
@@ -94,6 +112,269 @@ def SpeechSynthesis(result):
  # 🔍Search Glossary
 # @st.cache_resource
 def search_glossary(query):
@@ -628,7 +909,9 @@ def FileSidebar():
         if next_action=='search':
                 filesearch = PromptPrefix + file_contents
                 st.markdown(filesearch)
-                search_glossary(filesearch)
         if next_action=='md':
             st.markdown(file_contents)
@@ -869,32 +1152,16 @@ def display_buttons_with_scores(num_columns_text):
                 key = f"{category}_{game}_{term}".replace(' ', '_').lower()
                 score = load_score(key)
                 if st.button(f"{game_emoji} {category}  {game} {term} {score}", key=key):
-                    newscore = update_score(key.replace('?',''))
-                    query_prefix = f"{category_emoji} {game_emoji} ** {category} - {game} - {term} - **"
-                    st.markdown("Scored " + query_prefix + ' with score ' + str(newscore) + '.')
-def get_all_query_params(key):
-    return st.query_params().get(key, [])
-def clear_query_params():
-    st.query_params()
-# My Inference API Copy
-API_URL = 'https://qe55p8afio98s0u3.us-east-1.aws.endpoints.huggingface.cloud'  # Dr Llama
-# Meta's Original - Chat HF Free Version:
-#API_URL = "https://api-inference.huggingface.co/models/meta-llama/Llama-2-7b-chat-hf"
-API_KEY = os.getenv('API_KEY')
-MODEL1="meta-llama/Llama-2-7b-chat-hf"
-MODEL1URL="https://huggingface.co/meta-llama/Llama-2-7b-chat-hf"
-HF_KEY = os.getenv('HF_KEY')
-headers = {
-    "Authorization": f"Bearer {HF_KEY}",
-    "Content-Type": "application/json"
-}
-key = os.getenv('OPENAI_API_KEY')
-prompt = "...."
-should_save = st.sidebar.checkbox("💾 Save", value=True, help="Save your session data.")
@@ -951,15 +1218,6 @@ def query(payload):
 def get_output(prompt):
     return query({"inputs": prompt})
-# 5. Auto name generated output files from time and content
-def generate_filename(prompt, file_type):
-    central = pytz.timezone('US/Central')
-    safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
-    replaced_prompt = prompt.replace(" ", "_").replace("\n", "_")
-    safe_prompt = "".join(x for x in replaced_prompt if x.isalnum() or x == "_")[:255]  # 255 is linux max, 260 is windows max
-    #safe_prompt = "".join(x for x in replaced_prompt if x.isalnum() or x == "_")[:45]
-    return f"{safe_date_time}_{safe_prompt}.{file_type}"
 # 6. Speech transcription via OpenAI service
 def transcribe_audio(openai_key, file_path, model):
     openai.api_key = openai_key
@@ -1444,244 +1702,6 @@ if AddAFileForContext:
                     st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
-# GPT4o documentation
-# 1. Cookbook:  https://cookbook.openai.com/examples/gpt4o/introduction_to_gpt4o
-# 2. Configure your Project and Orgs to limit/allow Models:  https://platform.openai.com/settings/organization/general
-# 3. Watch your Billing!  https://platform.openai.com/settings/organization/billing/overview
-# Set API key and organization ID from environment variables
-openai.api_key = os.getenv('OPENAI_API_KEY')
-openai.organization = os.getenv('OPENAI_ORG_ID')
-client = OpenAI(api_key= os.getenv('OPENAI_API_KEY'), organization=os.getenv('OPENAI_ORG_ID'))
-# Define the model to be used
-#MODEL = "gpt-4o"
-MODEL = "gpt-4o-2024-05-13"
-def process_text(text_input):
-    if text_input:
-        st.session_state.messages.append({"role": "user", "content": text_input})
-        with st.chat_message("user"):
-            st.markdown(text_input)
-        with st.chat_message("assistant"):
-            completion = client.chat.completions.create(
-                model=MODEL,
-                messages=[
-                    {"role": m["role"], "content": m["content"]}
-                    for m in st.session_state.messages
-                ],
-                stream=False
-            )
-            return_text = completion.choices[0].message.content
-            st.write("Assistant: " + return_text)
-            filename = generate_filename(text_input, "md")
-            create_file(filename, text_input, return_text, should_save)
-            st.session_state.messages.append({"role": "assistant", "content": return_text})
-        #st.write("Assistant: " + completion.choices[0].message.content)
-def create_file(filename, prompt, response, is_image=False):
-    with open(filename, "w", encoding="utf-8") as f:
-        f.write(prompt + "\n\n" + response)
-def save_image_old2(image, filename):
-    with open(filename, "wb") as f:
-        f.write(image.getbuffer())
-# Now filename length protected for linux and windows filename lengths
-def save_image(image, filename):
-    max_filename_length = 250
-    filename_stem, extension = os.path.splitext(filename)
-    truncated_stem = filename_stem[:max_filename_length - len(extension)] if len(filename) > max_filename_length else filename_stem
-    filename = f"{truncated_stem}{extension}"
-    with open(filename, "wb") as f:
-        f.write(image.getbuffer())
-    return filename
-def extract_boldface_terms(text):
-    return re.findall(r'\*\*(.*?)\*\*', text)
-def extract_title(text):
-    boldface_terms = re.findall(r'\*\*(.*?)\*\*', text)
-    if boldface_terms:
-        title = ' '.join(boldface_terms)
-    else:
-        title = re.sub(r'[^a-zA-Z0-9_\-]', ' ', text[-200:])
-    return title[-200:]
-def process_image(image_input, user_prompt):
-    if image_input:
-        st.markdown('Processing image:  ' + image_input.name )
-        if image_input:
-            base64_image = base64.b64encode(image_input.read()).decode("utf-8")
-            response = client.chat.completions.create(
-                model=MODEL,
-                messages=[
-                    {"role": "system", "content": "You are a helpful assistant that responds in Markdown."},
-                    {"role": "user", "content": [
-                        {"type": "text", "text": user_prompt},
-                        {"type": "image_url", "image_url": {
-                            "url": f"data:image/png;base64,{base64_image}"}
-                        }
-                    ]}
-                ],
-                temperature=0.0,
-            )
-            image_response = response.choices[0].message.content
-            st.markdown(image_response)
-            # Save markdown on image AI output from gpt4o
-            filename_md = generate_filename(image_input.name + '- ' + image_response, "md")
-            # Save markdown on image AI output from gpt4o
-            filename_png = filename_md.replace('.md', '.' + image_input.name.split('.')[-1])
-            create_file(filename_md, image_response, '', True)          #create_file() # create_file()  3 required positional arguments: 'filename', 'prompt', and 'response'
-            with open(filename_md, "w", encoding="utf-8") as f:
-                f.write(image_response)
-            # Extract boldface terms from image_response then autoname save file
-            #boldface_terms = extract_boldface_terms(image_response)
-            boldface_terms = extract_title(image_response).replace(':','')
-            filename_stem, extension = os.path.splitext(image_input.name)
-            filename_img = f"{filename_stem}  {''.join(boldface_terms)}{extension}"
-            newfilename = save_image(image_input, filename_img)
-            filename_md = newfilename.replace('.png', '.md')
-            create_file(filename_md, '', image_response, True)
-            return image_response
-def create_audio_file(filename, audio_data, should_save):
-    if should_save:
-        with open(filename, "wb") as file:
-            file.write(audio_data.getvalue())
-        st.success(f"Audio file saved as {filename}")
-    else:
-        st.warning("Audio file not saved.")
-def process_audio(audio_input, text_input):
-    if audio_input:
-        transcription = client.audio.transcriptions.create(
-            model="whisper-1",
-            file=audio_input,
-        )
-        st.session_state.messages.append({"role": "user", "content": transcription.text})
-        with st.chat_message("assistant"):
-            st.markdown(transcription.text)
-            SpeechSynthesis(transcription.text)
-            filename = generate_filename(transcription.text, "wav")
-            create_audio_file(filename, audio_input, should_save)
-        #SpeechSynthesis(transcription.text)
-        filename = generate_filename(transcription.text, "md")
-        create_file(filename, transcription.text, transcription.text, should_save)
-        #st.markdown(response.choices[0].message.content)
-def process_audio_for_video(video_input):
-    if video_input:
-        try:
-            transcription = client.audio.transcriptions.create(
-                model="whisper-1",
-                file=video_input,
-            )
-            response = client.chat.completions.create(
-                model=MODEL,
-                messages=[
-                {"role": "system", "content":"""You are generating a transcript summary. Create a summary of the provided transcription. Respond in Markdown."""},
-                {"role": "user", "content": [{"type": "text", "text": f"The audio transcription is: {transcription}"}],}
-                ],
-                temperature=0,
-            )
-            st.markdown(response.choices[0].message.content)
-            return response.choices[0].message.content
-        except:
-            st.write('No transcript')
-def save_video(video_file):
-    # Save the uploaded video file
-    with open(video_file.name, "wb") as f:
-        f.write(video_file.getbuffer())
-    return video_file.name
-def process_video(video_path, seconds_per_frame=2):
-    base64Frames = []
-    base_video_path, _ = os.path.splitext(video_path)
-    video = cv2.VideoCapture(video_path)
-    total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
-    fps = video.get(cv2.CAP_PROP_FPS)
-    frames_to_skip = int(fps * seconds_per_frame)
-    curr_frame = 0
-    # Loop through the video and extract frames at specified sampling rate
-    while curr_frame < total_frames - 1:
-        video.set(cv2.CAP_PROP_POS_FRAMES, curr_frame)
-        success, frame = video.read()
-        if not success:
-            break
-        _, buffer = cv2.imencode(".jpg", frame)
-        base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
-        curr_frame += frames_to_skip
-    video.release()
-    # Extract audio from video
-    audio_path = f"{base_video_path}.mp3"
-    try:
-        clip = VideoFileClip(video_path)
-        clip.audio.write_audiofile(audio_path, bitrate="32k")
-        clip.audio.close()
-        clip.close()
-    except:
-        st.write('No audio track found, moving on..')
-    print(f"Extracted {len(base64Frames)} frames")
-    print(f"Extracted audio to {audio_path}")
-    return base64Frames, audio_path
-def process_audio_and_video(video_input):
-    if video_input is not None:
-        # Save the uploaded video file
-        video_path = save_video(video_input )
-        # Process the saved video
-        base64Frames, audio_path = process_video(video_path, seconds_per_frame=1)
-        # Get the transcript for the video model call
-        transcript = process_audio_for_video(video_input)
-        # Generate a summary with visual and audio
-        response = client.chat.completions.create(
-            model=MODEL,
-            messages=[
-                {"role": "system", "content": """You are generating a video summary. Create a summary of the provided video and its transcript. Respond in Markdown"""},
-                {"role": "user", "content": [
-                    "These are the frames from the video.",
-                    *map(lambda x: {"type": "image_url",
-                                    "image_url": {"url": f'data:image/jpg;base64,{x}', "detail": "low"}}, base64Frames),
-                    {"type": "text", "text": f"The audio transcription is: {transcript}"}
-                ]},
-            ],
-            temperature=0,
-        )
-        results = response.choices[0].message.content
-        st.markdown(results)
-        if transcript:
-            filename = generate_filename(transcript, "md")
-            create_file(filename, transcript, results, should_save)
 def main():

     }
 )
+# My Inference API Copy
+API_URL = 'https://qe55p8afio98s0u3.us-east-1.aws.endpoints.huggingface.cloud'  # Dr Llama
+# Meta's Original - Chat HF Free Version:
+#API_URL = "https://api-inference.huggingface.co/models/meta-llama/Llama-2-7b-chat-hf"
+API_KEY = os.getenv('API_KEY')
+MODEL1="meta-llama/Llama-2-7b-chat-hf"
+MODEL1URL="https://huggingface.co/meta-llama/Llama-2-7b-chat-hf"
+HF_KEY = os.getenv('HF_KEY')
+headers = {
+    "Authorization": f"Bearer {HF_KEY}",
+    "Content-Type": "application/json"
+}
+key = os.getenv('OPENAI_API_KEY')
+prompt = "...."
+should_save = st.sidebar.checkbox("💾 Save", value=True, help="Save your session data.")
 client = OpenAI(api_key= os.getenv('OPENAI_API_KEY'), organization=os.getenv('OPENAI_ORG_ID'))
 MODEL = "gpt-4o-2024-05-13"
 if "openai_model" not in st.session_state:
+# GPT4o documentation
+# 1. Cookbook:  https://cookbook.openai.com/examples/gpt4o/introduction_to_gpt4o
+# 2. Configure your Project and Orgs to limit/allow Models:  https://platform.openai.com/settings/organization/general
+# 3. Watch your Billing!  https://platform.openai.com/settings/organization/billing/overview
+# Set API key and organization ID from environment variables
+openai.api_key = os.getenv('OPENAI_API_KEY')
+openai.organization = os.getenv('OPENAI_ORG_ID')
+client = OpenAI(api_key= os.getenv('OPENAI_API_KEY'), organization=os.getenv('OPENAI_ORG_ID'))
+# Define the model to be used
+#MODEL = "gpt-4o"
+MODEL = "gpt-4o-2024-05-13"
+# 5. Auto name generated output files from time and content
+def generate_filename(prompt, file_type):
+    central = pytz.timezone('US/Central')
+    safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
+    replaced_prompt = prompt.replace(" ", "_").replace("\n", "_")
+    safe_prompt = "".join(x for x in replaced_prompt if x.isalnum() or x == "_")[:240]  # 255 is linux max, 260 is windows max
+    #safe_prompt = "".join(x for x in replaced_prompt if x.isalnum() or x == "_")[:45]
+    return f"{safe_date_time}_{safe_prompt}.{file_type}"
+def process_text(text_input):
+    if text_input:
+        st.session_state.messages.append({"role": "user", "content": text_input})
+        with st.chat_message("user"):
+            st.markdown(text_input)
+        with st.chat_message("assistant"):
+            completion = client.chat.completions.create(
+                model=MODEL,
+                messages=[
+                    {"role": m["role"], "content": m["content"]}
+                    for m in st.session_state.messages
+                ],
+                stream=False
+            )
+            return_text = completion.choices[0].message.content
+            st.write("Assistant: " + return_text)
+            filename = generate_filename(text_input, "md")
+            create_file(filename, text_input, return_text, should_save)
+            st.session_state.messages.append({"role": "assistant", "content": return_text})
+        #st.write("Assistant: " + completion.choices[0].message.content)
+def create_file(filename, prompt, response, is_image=False):
+    with open(filename, "w", encoding="utf-8") as f:
+        f.write(prompt + "\n\n" + response)
+def save_image_old2(image, filename):
+    with open(filename, "wb") as f:
+        f.write(image.getbuffer())
+# Now filename length protected for linux and windows filename lengths
+def save_image(image, filename):
+    max_filename_length = 250
+    filename_stem, extension = os.path.splitext(filename)
+    truncated_stem = filename_stem[:max_filename_length - len(extension)] if len(filename) > max_filename_length else filename_stem
+    filename = f"{truncated_stem}{extension}"
+    with open(filename, "wb") as f:
+        f.write(image.getbuffer())
+    return filename
+def extract_boldface_terms(text):
+    return re.findall(r'\*\*(.*?)\*\*', text)
+def extract_title(text):
+    boldface_terms = re.findall(r'\*\*(.*?)\*\*', text)
+    if boldface_terms:
+        title = ' '.join(boldface_terms)
+    else:
+        title = re.sub(r'[^a-zA-Z0-9_\-]', ' ', text[-200:])
+    return title[-200:]
+def process_image(image_input, user_prompt):
+    if image_input:
+        st.markdown('Processing image:  ' + image_input.name )
+        if image_input:
+            base64_image = base64.b64encode(image_input.read()).decode("utf-8")
+            response = client.chat.completions.create(
+                model=MODEL,
+                messages=[
+                    {"role": "system", "content": "You are a helpful assistant that responds in Markdown."},
+                    {"role": "user", "content": [
+                        {"type": "text", "text": user_prompt},
+                        {"type": "image_url", "image_url": {
+                            "url": f"data:image/png;base64,{base64_image}"}
+                        }
+                    ]}
+                ],
+                temperature=0.0,
+            )
+            image_response = response.choices[0].message.content
+            st.markdown(image_response)
+            # Save markdown on image AI output from gpt4o
+            filename_md = generate_filename(image_input.name + '- ' + image_response, "md")
+            # Save markdown on image AI output from gpt4o
+            filename_png = filename_md.replace('.md', '.' + image_input.name.split('.')[-1])
+            create_file(filename_md, image_response, '', True)          #create_file() # create_file()  3 required positional arguments: 'filename', 'prompt', and 'response'
+            with open(filename_md, "w", encoding="utf-8") as f:
+                f.write(image_response)
+            # Extract boldface terms from image_response then autoname save file
+            #boldface_terms = extract_boldface_terms(image_response)
+            boldface_terms = extract_title(image_response).replace(':','')
+            filename_stem, extension = os.path.splitext(image_input.name)
+            filename_img = f"{filename_stem}  {''.join(boldface_terms)}{extension}"
+            newfilename = save_image(image_input, filename_img)
+            filename_md = newfilename.replace('.png', '.md')
+            create_file(filename_md, '', image_response, True)
+            return image_response
+def create_audio_file(filename, audio_data, should_save):
+    if should_save:
+        with open(filename, "wb") as file:
+            file.write(audio_data.getvalue())
+        st.success(f"Audio file saved as {filename}")
+    else:
+        st.warning("Audio file not saved.")
+def process_audio(audio_input, text_input):
+    if audio_input:
+        transcription = client.audio.transcriptions.create(
+            model="whisper-1",
+            file=audio_input,
+        )
+        st.session_state.messages.append({"role": "user", "content": transcription.text})
+        with st.chat_message("assistant"):
+            st.markdown(transcription.text)
+            SpeechSynthesis(transcription.text)
+            filename = generate_filename(transcription.text, "wav")
+            create_audio_file(filename, audio_input, should_save)
+        #SpeechSynthesis(transcription.text)
+        filename = generate_filename(transcription.text, "md")
+        create_file(filename, transcription.text, transcription.text, should_save)
+        #st.markdown(response.choices[0].message.content)
+def process_audio_for_video(video_input):
+    if video_input:
+        try:
+            transcription = client.audio.transcriptions.create(
+                model="whisper-1",
+                file=video_input,
+            )
+            response = client.chat.completions.create(
+                model=MODEL,
+                messages=[
+                {"role": "system", "content":"""You are generating a transcript summary. Create a summary of the provided transcription. Respond in Markdown."""},
+                {"role": "user", "content": [{"type": "text", "text": f"The audio transcription is: {transcription}"}],}
+                ],
+                temperature=0,
+            )
+            st.markdown(response.choices[0].message.content)
+            return response.choices[0].message.content
+        except:
+            st.write('No transcript')
+def save_video(video_file):
+    # Save the uploaded video file
+    with open(video_file.name, "wb") as f:
+        f.write(video_file.getbuffer())
+    return video_file.name
+def process_video(video_path, seconds_per_frame=2):
+    base64Frames = []
+    base_video_path, _ = os.path.splitext(video_path)
+    video = cv2.VideoCapture(video_path)
+    total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
+    fps = video.get(cv2.CAP_PROP_FPS)
+    frames_to_skip = int(fps * seconds_per_frame)
+    curr_frame = 0
+    # Loop through the video and extract frames at specified sampling rate
+    while curr_frame < total_frames - 1:
+        video.set(cv2.CAP_PROP_POS_FRAMES, curr_frame)
+        success, frame = video.read()
+        if not success:
+            break
+        _, buffer = cv2.imencode(".jpg", frame)
+        base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
+        curr_frame += frames_to_skip
+    video.release()
+    # Extract audio from video
+    audio_path = f"{base_video_path}.mp3"
+    try:
+        clip = VideoFileClip(video_path)
+        clip.audio.write_audiofile(audio_path, bitrate="32k")
+        clip.audio.close()
+        clip.close()
+    except:
+        st.write('No audio track found, moving on..')
+    print(f"Extracted {len(base64Frames)} frames")
+    print(f"Extracted audio to {audio_path}")
+    return base64Frames, audio_path
+def process_audio_and_video(video_input):
+    if video_input is not None:
+        # Save the uploaded video file
+        video_path = save_video(video_input )
+        # Process the saved video
+        base64Frames, audio_path = process_video(video_path, seconds_per_frame=1)
+        # Get the transcript for the video model call
+        transcript = process_audio_for_video(video_input)
+        # Generate a summary with visual and audio
+        response = client.chat.completions.create(
+            model=MODEL,
+            messages=[
+                {"role": "system", "content": """You are generating a video summary. Create a summary of the provided video and its transcript. Respond in Markdown"""},
+                {"role": "user", "content": [
+                    "These are the frames from the video.",
+                    *map(lambda x: {"type": "image_url",
+                                    "image_url": {"url": f'data:image/jpg;base64,{x}', "detail": "low"}}, base64Frames),
+                    {"type": "text", "text": f"The audio transcription is: {transcript}"}
+                ]},
+            ],
+            temperature=0,
+        )
+        results = response.choices[0].message.content
+        st.markdown(results)
+        if transcript:
+            filename = generate_filename(transcript, "md")
+            create_file(filename, transcript, results, should_save)
  # 🔍Search Glossary
 # @st.cache_resource
 def search_glossary(query):
         if next_action=='search':
                 filesearch = PromptPrefix + file_contents
                 st.markdown(filesearch)
+                #search_glossary(filesearch)
+                process_text(filesearch)
         if next_action=='md':
             st.markdown(file_contents)
                 key = f"{category}_{game}_{term}".replace(' ', '_').lower()
                 score = load_score(key)
                 if st.button(f"{game_emoji} {category}  {game} {term} {score}", key=key):
+                    newscore = update_score(key.replace('?',''))
+                    query_prefix = f"{category_emoji} {game_emoji} ** {category} - {game} - {term} - **"
+                    st.markdown("Scored " + query_prefix + ' with score ' + str(newscore) + '.')
+def get_all_query_params(key):
+    return st.query_params().get(key, [])
+def clear_query_params():
+    st.query_params()
 def get_output(prompt):
     return query({"inputs": prompt})
 # 6. Speech transcription via OpenAI service
 def transcribe_audio(openai_key, file_path, model):
     openai.api_key = openai_key
                     st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
 def main():