Spaces:

msaelices
/

note-taker

Runtime error

App Files Files Community

msaelices commited on Aug 19, 2023

Commit

8fe64f7

•

1 Parent(s): e59c4fa

Initial commit

Browse files

Files changed (6) hide show

LICENSE +21 -0
README.md +2 -13
api.py +19 -0
engines.py +90 -0
main.py +82 -0
requirements.txt +8 -0

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2023 Manuel Saelices
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,13 +1,2 @@
----
-title: Note Taker
-emoji: 🐠
-colorFrom: green
-colorTo: purple
-sdk: streamlit
-sdk_version: 1.25.0
-app_file: app.py
-pinned: false
-license: mit
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


1	+ # note-taker
2	+ AI based UX for taking notes from an audio file, which speakers identification

api.py ADDED Viewed

	@@ -0,0 +1,19 @@

+import requests
+from io import BytesIO
+from engines import TranscriptEngine
+def transcribe(engine: TranscriptEngine, language: str, audio_file: BytesIO) -> str:
+    return engine.transcribe(language, audio_file)
+def summarize_transcript(
+    openai_api_key: str,
+    transcript: str,
+    openai_model: str = 'gpt-4',
+    prompt: str = 'Summarize the following audio transcription with a list of the key points with the speakers in the original language:',
+) -> str:
+    """Summarize the transcription using OpenAI's API"""
+    # TODO: Implement this
+    return 'This is a summary of the transcription.'

engines.py ADDED Viewed

	@@ -0,0 +1,90 @@

+from typing import Protocol
+from io import BytesIO
+import requests
+from google.cloud import speech_v2 as speech
+class TranscriptEngine(Protocol):
+    """Protocol for a transcription engine"""
+    def transcribe(self, language, audio_file: bytes) -> str:
+        """transcribe audio file to text"""
+        ...
+class AssemblyAI:
+    transcript = 'https://api.assemblyai.com/v2/transcript'
+    upload = 'https://api.assemblyai.com/v2/upload'
+    def __init__(self, api_key: str):
+        self.api_key = api_key
+    def transcribe(self, language, audio_file: BytesIO) -> str:
+        headers = {'authorization': self.api_key, 'content-type': 'application/json'}
+        upload_response = requests.post(
+            AssemblyAI.upload, headers=headers, data=audio_file
+        )
+        audio_url = upload_response.json()['upload_url']
+        json = {
+            'audio_url': audio_url,
+            'iab_categories': True,
+            'language_code': language,
+            'speaker_labels': True,
+        }
+        response = requests.post(AssemblyAI.transcript, json=json, headers=headers)
+        if not response.ok:
+            # TODO: Handle errors
+            return response.json()
+        polling_endpoint = f'{AssemblyAI.transcript}/{response.json()["id"]}'
+        status = 'submitted'
+        while status != 'completed':
+            polling_response = requests.get(polling_endpoint, headers=headers)
+            status = polling_response.json()['status']
+        # TODO: Remove this
+        print(polling_response.json())
+        # TODO: Return the speakers and their text
+        return polling_response.json()['text']
+class GoogleCloud:
+    def __init__(self, api_key: str):
+        self.api_key = api_key
+    def transcribe(self, language, audio_file: BytesIO) -> str:
+        client = speech.SpeechClient()
+        audio = speech.RecognitionAudio(content=audio_file.read())
+        config = speech.RecognitionConfig(
+            encoding=speech.RecognitionConfig.AudioEncoding.ENCODING_UNSPECIFIED,
+            language_code=language,
+            diarization_config=speech.SpeakerDiarizationConfig(
+                enable_speaker_diarization=True,
+            ),
+        )
+        operation = client.long_running_recognize(config=config, audio=audio)
+        response = operation.result()
+        return ' '.join(
+            result.alternatives[0].transcript for result in response.results
+        )
+def get_engine(engine_type: str, api_key: str | None) -> TranscriptEngine:
+    engine_cls = {
+        'AssemblyAI': AssemblyAI,
+        'Google': GoogleCloud,
+    }[engine_type]
+    return engine_cls(api_key)

main.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import os
+import streamlit as st
+from dotenv import load_dotenv
+from engines import get_engine
+import api
+# Load environment variables from .env file before importing any other modules
+load_dotenv()
+def main():
+    st.set_page_config(
+        page_title="Note Taker",
+        page_icon="🎙️",
+        layout="centered",
+        initial_sidebar_state="expanded",
+    )
+    title = "🎙️ Meetings Note Taker 🎙️"
+    st.title(title)
+    st.write(
+        "Upload an audio file, transcribe it using Assembly.AI, and sgenerate meeting notes using your selected model."
+    )
+    openai_api_key = os.environ.get("OPENAI_API_KEY") or st.text_input(
+        "Enter your OpenAI API key:", type="password"
+    )
+    engine_type = os.environ.get("TRANSCRIPTION_ENGINE") or st.selectbox(
+        "Select a transcription engine:", ["AssemblyAI", "Google"]
+    )
+    if engine_type in ["AssemblyAI"]:
+        engine_api_key = os.environ.get(
+            f"{engine_type.upper()}_API_KEY"
+        ) or st.text_input(f"Enter your {engine_type} API key:", type="password")
+    else:
+        engine_api_key = (
+            None  # Google doesn't need an API key but uses a credentials file
+        )
+    openai_model = os.environ.get("OPENAI_MODEL") or st.selectbox(
+        "Select a model:", ["gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-4-0613"]
+    )
+    uploaded_audio = st.file_uploader(
+        "Upload an audio file",
+        type=["aac", "m4a", "mp3", "webm", "mp4", "mpga", "wav", "mpeg"],
+        accept_multiple_files=False,
+    )
+    language = os.environ.get("AUDIO_LANGUAGE") or st.selectbox(
+        "Language code of the audio:", ["en", "es"]
+    )
+    if st.button("Generate Notes"):
+        if uploaded_audio:
+            if openai_api_key:
+                st.markdown("Transcribing the audio...")
+                engine = get_engine(engine_type, engine_api_key)
+                transcription = api.transcribe(engine, language, uploaded_audio)
+                st.markdown(
+                    f"###  Transcription:\n\n<details><summary>Click to view</summary><p><pre><code>{transcription}</code></pre></p></details>",
+                    unsafe_allow_html=True,
+                )
+                st.markdown("Summarizing the transcription...")
+                summary = api.summarize_transcript(
+                    openai_api_key,
+                    transcription,
+                    openai_model,
+                )
+                st.markdown(f"### Summary:")
+                st.write(summary)
+            else:
+                st.error("We need valid OpenAI and AssemblyAI API keys")
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+requests>=2.31.0
+streamlit>=1.25.0
+python-dotenv>=1.0.0
+google_cloud_speech>=2.21.0
+torch==2.0.0+cu117
+torchvision==0.15.1+cu117
+torchaudio==2.0.1 --index-url https://download.pytorch.org/whl/cu117
+git+https://github.com/m-bain/whisperx.git