Spaces:

sohojoe
/

project_charles

Sleeping

App Files Files Community

sohojoe commited on May 25, 2023

Commit

edce499

•

1 Parent(s): 4531ec5

add basic text to speech

Browse files

Files changed (10) hide show

app.py +151 -0
requirements.txt +13 -0
sample_utils/__init__.py +0 -0
sample_utils/__pycache__/__init__.cpython-310.pyc +0 -0
sample_utils/__pycache__/__init__.cpython-39.pyc +0 -0
sample_utils/__pycache__/download.cpython-310.pyc +0 -0
sample_utils/__pycache__/turn.cpython-310.pyc +0 -0
sample_utils/__pycache__/turn.cpython-39.pyc +0 -0
sample_utils/download.py +50 -0
sample_utils/turn.py +32 -0

app.py ADDED Viewed

	@@ -0,0 +1,151 @@

+from collections import deque
+import os
+import threading
+import time
+import av
+import numpy as np
+import streamlit as st
+from streamlit_webrtc import WebRtcMode, webrtc_streamer
+import pydub
+# import av
+# import cv2
+from sample_utils.turn import get_ice_servers
+import json
+from typing import List
+from vosk import SetLogLevel, Model, KaldiRecognizer
+SetLogLevel(-1) # mutes vosk verbosity
+from dotenv import load_dotenv
+load_dotenv()
+system_one = {
+    "audio_bit_rate": 16000,
+    # "audio_bit_rate": 32000,
+    # "audio_bit_rate": 48000,
+}
+playing = st.checkbox("Playing", value=True)
+def load_vosk (model='small'):
+    # load vosk model
+    # get path of current file
+    current_file_path = os.path.abspath(__file__)
+    current_directory = os.path.dirname(current_file_path)
+    _path = os.path.join(current_directory, 'models', 'vosk', model)
+    model_voice = Model(_path)
+    recognizer = KaldiRecognizer(model_voice, system_one['audio_bit_rate'])
+    return recognizer
+vask = load_vosk()
+def handle_audio_frame(frame):
+    # if self.vosk.AcceptWaveform(data):
+    pass
+def do_work(data: bytearray) -> tuple[str, bool]:
+    text = ''
+    speaker_finished = False
+    if vask.AcceptWaveform(data):
+        result = vask.Result()
+        result_json = json.loads(result)
+        text = result_json['text']
+        speaker_finished = True
+    else:
+        result = vask.PartialResult()
+        result_json = json.loads(result)
+        text = result_json['partial']
+    return text, speaker_finished
+frames_deque_lock = threading.Lock()
+frames_deque: deque = deque([])
+async def queued_audio_frames_callback(
+            frames: List[av.AudioFrame],
+        ) -> av.AudioFrame:
+    with frames_deque_lock:
+        frames_deque.extend(frames)
+    # create frames to be returned.
+    new_frames = []
+    for frame in frames:
+        input_array = frame.to_ndarray()
+        new_frame = av.AudioFrame.from_ndarray(
+            np.zeros(input_array.shape, dtype=input_array.dtype),
+            layout=frame.layout.name,
+        )
+        new_frame.sample_rate = frame.sample_rate
+        new_frames.append(new_frame)
+    # TODO: replace with the audio we want to send to the other side.
+    return new_frames
+webrtc_ctx = webrtc_streamer(
+    key="charles",
+    desired_playing_state=playing,
+    # audio_receiver_size=4096,
+    # audio_frame_callback=process_audio,
+    queued_audio_frames_callback=queued_audio_frames_callback,
+    mode=WebRtcMode.SENDRECV,
+    rtc_configuration={"iceServers": get_ice_servers()},
+    async_processing=True,
+)
+system_one_audio_status = st.empty()
+if not webrtc_ctx.state.playing:
+    exit
+system_one_audio_status.write("Initializing...")
+system_one_audio_output = st.empty()
+system_one_audio_history = []
+system_one_audio_history_output = st.empty()
+sound_chunk = pydub.AudioSegment.empty()
+while True:
+    if webrtc_ctx.state.playing:
+        audio_frames = []
+        with frames_deque_lock:
+            while len(frames_deque) > 0:
+                frame = frames_deque.popleft()
+                audio_frames.append(frame)
+        if len(audio_frames) == 0:
+            time.sleep(0.1)
+            system_one_audio_status.write("No frame arrived.")
+            continue
+        system_one_audio_status.write("Running. Say something!")
+        for audio_frame in audio_frames:
+            sound = pydub.AudioSegment(
+                data=audio_frame.to_ndarray().tobytes(),
+                sample_width=audio_frame.format.bytes,
+                frame_rate=audio_frame.sample_rate,
+                channels=len(audio_frame.layout.channels),
+            )
+            sound = sound.set_channels(1)
+            sound = sound.set_frame_rate(system_one['audio_bit_rate'])
+            sound_chunk += sound
+        if len(sound_chunk) > 0:
+            buffer = np.array(sound_chunk.get_array_of_samples())
+            text, speaker_finished = do_work(buffer.tobytes())
+            system_one_audio_output.markdown(f"**System 1 Audio:** {text}")
+            if speaker_finished and len(text) > 0:
+                system_one_audio_history.append(text)
+                if len(system_one_audio_history) > 10:
+                    system_one_audio_history = system_one_audio_history[-10:]
+                table_content = "| System 1 Audio History |\n| --- |\n"
+                table_content += "\n".join([f"| {item} |" for item in reversed(system_one_audio_history)])
+                system_one_audio_history_output.markdown(table_content)
+            sound_chunk = pydub.AudioSegment.empty()
+    else:
+        system_one_audio_status.write("Stopped.")
+        break

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+vosk
+# scipy
+# sounddevice
+# setuptools-rust
+# git+https://github.com/openai/whisper.git
+opencv-python-headless
+pydub
+streamlit_webrtc
+twilio
+python-dotenv
+watchdog
+pydub

sample_utils/__init__.py ADDED Viewed

File without changes

sample_utils/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (168 Bytes). View file

sample_utils/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (166 Bytes). View file

sample_utils/__pycache__/download.cpython-310.pyc ADDED Viewed

Binary file (1.32 kB). View file

sample_utils/__pycache__/turn.cpython-310.pyc ADDED Viewed

Binary file (1.29 kB). View file

sample_utils/__pycache__/turn.cpython-39.pyc ADDED Viewed

Binary file (1.29 kB). View file

sample_utils/download.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import urllib.request
+from pathlib import Path
+import streamlit as st
+# This code is based on https://github.com/streamlit/demo-self-driving/blob/230245391f2dda0cb464008195a470751c01770b/streamlit_app.py#L48  # noqa: E501
+def download_file(url, download_to: Path, expected_size=None):
+    # Don't download the file twice.
+    # (If possible, verify the download using the file length.)
+    if download_to.exists():
+        if expected_size:
+            if download_to.stat().st_size == expected_size:
+                return
+        else:
+            st.info(f"{url} is already downloaded.")
+            if not st.button("Download again?"):
+                return
+    download_to.parent.mkdir(parents=True, exist_ok=True)
+    # These are handles to two visual elements to animate.
+    weights_warning, progress_bar = None, None
+    try:
+        weights_warning = st.warning("Downloading %s..." % url)
+        progress_bar = st.progress(0)
+        with open(download_to, "wb") as output_file:
+            with urllib.request.urlopen(url) as response:
+                length = int(response.info()["Content-Length"])
+                counter = 0.0
+                MEGABYTES = 2.0 ** 20.0
+                while True:
+                    data = response.read(8192)
+                    if not data:
+                        break
+                    counter += len(data)
+                    output_file.write(data)
+                    # We perform animation by overwriting the elements.
+                    weights_warning.warning(
+                        "Downloading %s... (%6.2f/%6.2f MB)"
+                        % (url, counter / MEGABYTES, length / MEGABYTES)
+                    )
+                    progress_bar.progress(min(counter / length, 1.0))
+    # Finally, we remove these visual elements by calling .empty().
+    finally:
+        if weights_warning is not None:
+            weights_warning.empty()
+        if progress_bar is not None:
+            progress_bar.empty()

sample_utils/turn.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import logging
+import os
+import streamlit as st
+from twilio.rest import Client
+logger = logging.getLogger(__name__)
+@st.cache_data
+def get_ice_servers():
+    """Use Twilio's TURN server because Streamlit Community Cloud has changed
+    its infrastructure and WebRTC connection cannot be established without TURN server now.  # noqa: E501
+    We considered Open Relay Project (https://www.metered.ca/tools/openrelay/) too,
+    but it is not stable and hardly works as some people reported like https://github.com/aiortc/aiortc/issues/832#issuecomment-1482420656  # noqa: E501
+    See https://github.com/whitphx/streamlit-webrtc/issues/1213
+    """
+    # Ref: https://www.twilio.com/docs/stun-turn/api
+    try:
+        account_sid = os.environ["TWILIO_ACCOUNT_SID"]
+        auth_token = os.environ["TWILIO_AUTH_TOKEN"]
+    except KeyError:
+        logger.warning(
+            "Twilio credentials are not set. Fallback to a free STUN server from Google."  # noqa: E501
+        )
+        return [{"urls": ["stun:stun.l.google.com:19302"]}]
+    client = Client(account_sid, auth_token)
+    token = client.tokens.create()
+    return token.ice_servers