Spaces:

benjolo
/

InterpreTalk

Paused

App Files Files Community

benjolo commited on Apr 21

Commit

78289fa

•

1 Parent(s): 3443e32

Delete backend

Browse files

Files changed (45) hide show

backend/.DS_Store +0 -0
backend/.env +0 -2
backend/.gitignore +0 -2
backend/Client.py +0 -81
backend/__pycache__/Client.cpython-310.pyc +0 -0
backend/__pycache__/main.cpython-310.pyc +0 -0
backend/logging.yaml +0 -22
backend/main.py +0 -343
backend/models/Seamless/vad_s2st_sc_24khz_main.yaml +0 -25
backend/models/SeamlessStreaming/vad_s2st_sc_main.yaml +0 -21
backend/mongodb/endpoints/__pycache__/calls.cpython-310.pyc +0 -0
backend/mongodb/endpoints/__pycache__/users.cpython-310.pyc +0 -0
backend/mongodb/endpoints/calls.py +0 -96
backend/mongodb/endpoints/users.py +0 -53
backend/mongodb/models/__pycache__/calls.cpython-310.pyc +0 -0
backend/mongodb/models/__pycache__/users.cpython-310.pyc +0 -0
backend/mongodb/models/calls.py +0 -75
backend/mongodb/models/users.py +0 -44
backend/mongodb/operations/__pycache__/calls.cpython-310.pyc +0 -0
backend/mongodb/operations/__pycache__/users.cpython-310.pyc +0 -0
backend/mongodb/operations/calls.py +0 -280
backend/mongodb/operations/users.py +0 -77
backend/pcmToWav.py +0 -34
backend/preprocess_wav.py +0 -65
backend/requirements.txt +0 -28
backend/routes/__init__.py +0 -1
backend/routes/__pycache__/__init__.cpython-310.pyc +0 -0
backend/routes/__pycache__/routing.cpython-310.pyc +0 -0
backend/routes/routing.py +0 -9
backend/seamless/__init__.py +0 -0
backend/seamless/room.py +0 -64
backend/seamless/simuleval_agent_directory.py +0 -171
backend/seamless/simuleval_transcoder.py +0 -428
backend/seamless/speech_and_text_output.py +0 -15
backend/seamless/transcoder_helpers.py +0 -43
backend/seamless_utils.py +0 -210
backend/tests/__pycache__/test_client.cpython-310-pytest-8.1.1.pyc +0 -0
backend/tests/__pycache__/test_main.cpython-310-pytest-8.1.1.pyc +0 -0
backend/tests/__pycache__/test_main.cpython-310.pyc +0 -0
backend/tests/silence.wav +0 -0
backend/tests/speaking.wav +0 -0
backend/tests/test_client.py +0 -59
backend/tests/test_main.py +0 -90
backend/utils/__pycache__/text_rank.cpython-310.pyc +0 -0
backend/utils/text_rank.py +0 -60

backend/.DS_Store DELETED Viewed

Binary file (6.15 kB)

backend/.env DELETED Viewed

	@@ -1,2 +0,0 @@
1	- MONGODB_URI=mongodb+srv://benjolo:26qtppddzz2jx9@it-cluster1.4cwyb2f.mongodb.net/?retryWrites=true&w=majority&appName=IT-Cluster1
2	- OPENAI_API_KEY=sk-proj-vc4w7s6gkfwFG8xLBunZT3BlbkFJ8h9zOoyS0OY756vMgBcc

backend/.gitignore DELETED Viewed

	@@ -1,2 +0,0 @@
1	- myenv
2	- .pytest_cache

backend/Client.py DELETED Viewed

@@ -1,81 +0,0 @@
-from typing import Tuple
-import wave
-import os
-import torchaudio
-from vad import EnergyVAD
-TARGET_SAMPLING_RATE = 16000
-def create_frames(data: bytes, frame_duration: int) -> Tuple[bytes]:
-    frame_size = int(TARGET_SAMPLING_RATE * (frame_duration / 1000))
-    return (data[i:i + frame_size] for i in range(0, len(data), frame_size)), frame_size
-def detect_activity(energies: list):
-    if sum(energies) < len(energies) / 12:
-        return False
-    count = 0
-    for energy in energies:
-        if energy == 1:
-            count += 1
-            if count == 12:
-                return True
-        else:
-            count = 0
-    return False
-class Client:
-    def __init__(self, sid, client_id, username, call_id=None, original_sr=None):
-        self.sid = sid
-        self.client_id = client_id
-        self.username = username,
-        self.call_id = call_id
-        self.buffer = bytearray()
-        self.output_path = self.sid + "_output_audio.wav"
-        self.target_language = None
-        self.original_sr = original_sr
-        self.vad = EnergyVAD(
-            sample_rate=TARGET_SAMPLING_RATE,
-            frame_length=25,
-            frame_shift=20,
-            energy_threshold=0.05,
-            pre_emphasis=0.95,
-        ) # PM - Default values given in the docs for this class
-    def add_bytes(self, new_bytes):
-        self.buffer += new_bytes
-    def resample_and_clear(self):
-        print(f"📥 [ClientAudioBuffer] Writing {len(self.buffer)} bytes to {self.output_path}")
-        with wave.open(self.sid + "_OG.wav", "wb") as wf:
-            wf.setnchannels(1)
-            wf.setsampwidth(2)
-            wf.setframerate(self.original_sr)
-            wf.setnframes(0)
-            wf.setcomptype("NONE", "not compressed")
-            wf.writeframes(self.buffer)
-        waveform, sample_rate = torchaudio.load(self.sid + "_OG.wav")
-        resampler = torchaudio.transforms.Resample(sample_rate, TARGET_SAMPLING_RATE, dtype=waveform.dtype)
-        resampled_waveform = resampler(waveform)
-        self.buffer = bytearray()
-        return resampled_waveform
-    def vad_analyse(self, resampled_waveform):
-        torchaudio.save(self.output_path, resampled_waveform, TARGET_SAMPLING_RATE)
-        vad_array = self.vad(resampled_waveform)
-        print(f"VAD OUTPUT: {vad_array}")
-        return detect_activity(vad_array)
-    def write_to_file(self, resampled_waveform):
-        torchaudio.save(self.output_path, resampled_waveform, TARGET_SAMPLING_RATE)
-    def get_length(self):
-        return len(self.buffer)
-    def __del__(self):
-        if len(self.buffer) > 0:
-            print(f"🚨 [ClientAudioBuffer] Buffer not empty for {self.sid} ({len(self.buffer)} bytes)!")
-        if os.path.exists(self.output_path):
-            os.remove(self.output_path)
-        if os.path.exists(self.sid + "_OG.wav"):
-            os.remove(self.sid + "_OG.wav")

backend/__pycache__/Client.cpython-310.pyc DELETED Viewed

Binary file (3.41 kB)

backend/__pycache__/main.cpython-310.pyc DELETED Viewed

Binary file (6.48 kB)

backend/logging.yaml DELETED Viewed

@@ -1,22 +0,0 @@
-version: 1
-disable_existing_loggers: false
-formatters:
-  standard:
-    format: "%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s"
-handlers:
-  console:
-    class: logging.StreamHandler
-    formatter: standard
-    stream: ext://sys.stdout
-loggers:
-  uvicorn:
-    error:
-      propagate: true
-root:
-  level: INFO
-  handlers: [console]
-  propagate: no

backend/main.py DELETED Viewed

@@ -1,343 +0,0 @@
-from operator import itemgetter
-import os
-from datetime import datetime
-import uvicorn
-from typing import Any, Optional, Tuple, Dict, TypedDict
-from urllib import parse
-from uuid import uuid4
-import logging
-from fastapi.logger import logger as fastapi_logger
-import sys
-from fastapi import FastAPI
-from fastapi.middleware.cors import CORSMiddleware
-from fastapi import APIRouter, Body, Request, status
-from pymongo import MongoClient
-from dotenv import dotenv_values
-from routes import router as api_router
-from contextlib import asynccontextmanager
-import requests
-from typing import List
-from datetime import date
-from mongodb.operations.calls import *
-from mongodb.operations.users import *
-from mongodb.models.calls import UserCall, UpdateCall
-# from mongodb.endpoints.calls import *
-from transformers import AutoProcessor, SeamlessM4Tv2Model
-# from seamless_communication.inference import Translator
-from Client import Client
-import numpy as np
-import torch
-import socketio
-# Configure logger
-gunicorn_error_logger = logging.getLogger("gunicorn.error")
-gunicorn_logger = logging.getLogger("gunicorn")
-uvicorn_access_logger = logging.getLogger("uvicorn.access")
-gunicorn_error_logger.propagate = True
-gunicorn_logger.propagate = True
-uvicorn_access_logger.propagate = True
-uvicorn_access_logger.handlers = gunicorn_error_logger.handlers
-fastapi_logger.handlers = gunicorn_error_logger.handlers
-# sio is the main socket.io entrypoint
-sio = socketio.AsyncServer(
-    async_mode="asgi",
-    cors_allowed_origins="*",
-    logger=gunicorn_logger,
-    engineio_logger=gunicorn_logger,
-)
-# sio.logger.setLevel(logging.DEBUG)
-socketio_app = socketio.ASGIApp(sio)
-# app.mount("/", socketio_app)
-config = dotenv_values(".env")
-# Read connection string from environment vars
-# uri = os.environ['MONGODB_URI']
-# Read connection string from .env file
-uri = config['MONGODB_URI']
-# MongoDB Connection Lifespan Events
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    # startup logic
-    app.mongodb_client = MongoClient(uri)
-    app.database = app.mongodb_client['IT-Cluster1'] #connect to interpretalk primary db
-    try:
-        app.mongodb_client.admin.command('ping')
-        print("MongoDB Connection Established...")
-    except Exception as e:
-        print(e)
-    yield
-    # shutdown logic
-    print("Closing MongoDB Connection...")
-    app.mongodb_client.close()
-app = FastAPI(lifespan=lifespan, logger=gunicorn_logger)
-# New CORS funcitonality
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"], # configured node app port
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-app.include_router(api_router) # include routers for user, calls and transcripts operations
-DEBUG = True
-ESCAPE_HATCH_SERVER_LOCK_RELEASE_NAME = "remove_server_lock"
-TARGET_SAMPLING_RATE = 16000
-MAX_BYTES_BUFFER = 960_000
-print("")
-print("")
-print("=" * 18 + " Interpretalk is starting... " + "=" * 18)
-###############################################
-# Configure socketio server
-###############################################
-# TODO PM - change this to the actual path
-# seamless remnant code
-CLIENT_BUILD_PATH = "../streaming-react-app/dist/"
-static_files = {
-    "/": CLIENT_BUILD_PATH,
-    "/assets/seamless-db6a2555.svg": {
-        "filename": CLIENT_BUILD_PATH + "assets/seamless-db6a2555.svg",
-        "content_type": "image/svg+xml",
-    },
-}
-device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-processor = AutoProcessor.from_pretrained("facebook/seamless-m4t-v2-large")
-# PM - hardcoding temporarily as my GPU doesnt have enough vram
-model = SeamlessM4Tv2Model.from_pretrained("facebook/seamless-m4t-v2-large").to(device)
-bytes_data = bytearray()
-model_name = "seamlessM4T_v2_large"
-vocoder_name = "vocoder_v2" if model_name == "seamlessM4T_v2_large" else "vocoder_36langs"
-clients = {}
-rooms = {}
-def get_collection_users():
-    return app.database["user_records"]
-def get_collection_calls():
-    return app.database["call_records"]
-@app.get("/home/", response_description="Welcome User")
-def test():
-    return {"message": "Welcome to InterpreTalk!"}
-async def send_translated_text(client_id, username, original_text, translated_text, room_id):
-    # print(rooms) # Debugging
-    # print(clients) # Debugging
-    data = {
-        "author_id": str(client_id),
-        "author_username": str(username),
-        "original_text": str(original_text),
-        "translated_text": str(translated_text),
-        "timestamp": str(datetime.now())
-    }
-    gunicorn_logger.info("SENDING TRANSLATED TEXT TO CLIENT")
-    await sio.emit("translated_text", data, room=room_id)
-    gunicorn_logger.info("SUCCESSFULLY SEND AUDIO TO FRONTEND")
-@sio.on("connect")
-async def connect(sid, environ):
-    print(f"📥 [event: connected] sid={sid}")
-    query_params = dict(parse.parse_qsl(environ["QUERY_STRING"]))
-    client_id = query_params.get("client_id")
-    gunicorn_logger.info(f"📥 [event: connected] sid={sid}, client_id={client_id}")
-    # get username to Client Object from DB
-    username = find_name_from_id(get_collection_users(), client_id)
-    # sid = socketid, client_id = client specific ID ,always the same for same user
-    clients[sid] = Client(sid, client_id, username)
-    print(clients[sid].username)
-    gunicorn_logger.warning(f"Client connected: {sid}")
-    gunicorn_logger.warning(clients)
-@sio.on("disconnect")
-async def disconnect(sid):
-    gunicorn_logger.debug(f"📤 [event: disconnected] sid={sid}")
-    call_id = clients[sid].call_id
-    user_id = clients[sid].client_id
-    target_language = clients[sid].target_language
-    clients.pop(sid, None)
-    # Perform Key Term Extraction and summarisation
-    try:
-        # Get combined caption field for call record based on call_id
-        key_terms = term_extraction(get_collection_calls(), call_id, user_id, target_language)
-        # Perform summarisation based on target language
-        summary_result = summarise(get_collection_calls(), call_id, user_id, target_language)
-    except:
-        gunicorn_logger.error(f"📤 [event: term_extraction/summarisation request error] sid={sid}, call={call_id}")
-@sio.on("target_language")
-async def target_language(sid, target_lang):
-    gunicorn_logger.info(f"📥 [event: target_language] sid={sid}, target_lang={target_lang}")
-    clients[sid].target_language = target_lang
-@sio.on("call_user")
-async def call_user(sid, call_id):
-    clients[sid].call_id = call_id
-    gunicorn_logger.info(f"CALL {sid}: entering room {call_id}")
-    rooms[call_id] = rooms.get(call_id, [])
-    if sid not in rooms[call_id] and len(rooms[call_id]) < 2:
-        rooms[call_id].append(sid)
-        sio.enter_room(sid, call_id)
-    else:
-        gunicorn_logger.info(f"CALL {sid}: room {call_id} is full")
-        # await sio.emit("room_full", room=call_id, to=sid)
-    # BO - Get call id from dictionary created during socketio connection
-    client_id = clients[sid].client_id
-    gunicorn_logger.warning(f"NOW TRYING TO CREATE DB RECORD FOR Caller with ID: {client_id} for call: {call_id}")
-    # BO -> Create Call Record with Caller and call_id field (None for callee, duration, terms..)
-    request_data = {
-        "call_id": str(call_id),
-        "caller_id": str(client_id),
-        "creation_date": str(datetime.now())
-    }
-    response =  create_calls(get_collection_calls(), request_data)
-    print(response) # BO - print created db call record
-@sio.on("audio_config")
-async def audio_config(sid, sample_rate):
-    clients[sid].original_sr = sample_rate
-@sio.on("answer_call")
-async def answer_call(sid, call_id):
-    clients[sid].call_id = call_id
-    gunicorn_logger.info(f"ANSWER {sid}: entering room {call_id}")
-    rooms[call_id] = rooms.get(call_id, [])
-    if sid not in rooms[call_id] and len(rooms[call_id]) < 2:
-        rooms[call_id].append(sid)
-        sio.enter_room(sid, call_id)
-    else:
-        gunicorn_logger.info(f"ANSWER {sid}: room {call_id} is full")
-        # await sio.emit("room_full", room=call_id, to=sid)
-    # BO - Get call id from dictionary created during socketio connection
-    client_id = clients[sid].client_id
-    # BO -> Update Call Record with Callee field based on call_id
-    gunicorn_logger.warning(f"NOW UPDATING MongoDB RECORD FOR Caller with ID: {client_id} for call: {call_id}")
-    # BO -> Create Call Record with callee_id field (None for callee, duration, terms..)
-    request_data = {
-        "callee_id": client_id
-    }
-    response =  update_calls(get_collection_calls(), call_id, request_data)
-    print(response) # BO - print created db call record
-@sio.on("incoming_audio")
-async def incoming_audio(sid, data, call_id):
-    try:
-        clients[sid].add_bytes(data)
-        if clients[sid].get_length() >= MAX_BYTES_BUFFER:
-            gunicorn_logger.info('Buffer full, now outputting...')
-            output_path = clients[sid].output_path
-            resampled_audio = clients[sid].resample_and_clear()
-            vad_result = clients[sid].vad_analyse(resampled_audio)
-            # source lang is speakers tgt language 😃
-            src_lang = clients[sid].target_language
-            if vad_result:
-                gunicorn_logger.info('Speech detected, now processing audio.....')
-                tgt_sid = next(id for id in rooms[call_id] if id != sid)
-                tgt_lang = clients[tgt_sid].target_language
-                # following example from https://github.com/facebookresearch/seamless_communication/blob/main/docs/m4t/README.md#transformers-usage
-                output_tokens = processor(audios=resampled_audio, src_lang=src_lang, return_tensors="pt", sampling_rate=TARGET_SAMPLING_RATE).to(device)
-                model_output = model.generate(**output_tokens, tgt_lang=src_lang, generate_speech=False)[0].tolist()[0]
-                asr_text = processor.decode(model_output, skip_special_tokens=True)
-                print(f"ASR TEXT = {asr_text}")
-                # ASR TEXT => ORIGINAL TEXT
-                if src_lang != tgt_lang:
-                    t2t_tokens = processor(text=asr_text, src_lang=src_lang, tgt_lang=tgt_lang, return_tensors="pt").to(device)
-                    translated_data = model.generate(**t2t_tokens, tgt_lang=tgt_lang, generate_speech=False)[0].tolist()[0]
-                    translated_text = processor.decode(translated_data, skip_special_tokens=True)
-                    print(f"TRANSLATED TEXT = {translated_text}")
-                else:
-                    # PM - both users have same language selected, no need to translate
-                    translated_text = asr_text
-                # PM - text_output is a list with 1 string
-                await send_translated_text(clients[sid].client_id, clients[sid].username, asr_text, translated_text, call_id)
-                # BO -> send translated_text to mongodb as caption record update based on call_id
-                await send_captions(clients[sid].client_id, clients[sid].username, asr_text, translated_text, call_id)
-    except Exception as e:
-        gunicorn_logger.error(f"Error in incoming_audio: {e.with_traceback()}")
-async def send_captions(client_id, username, original_text, translated_text, call_id):
-    # BO -> Update Call Record with Callee field based on call_id
-    print(f"Now updating Caption field in call record for Caller with ID: {client_id} for call: {call_id}")
-    data = {
-        "author_id": str(client_id),
-        "author_username": str(username),
-        "original_text": str(original_text),
-        "translated_text": str(translated_text),
-        "timestamp": str(datetime.now())
-    }
-    response = update_captions(get_collection_calls(), get_collection_users(), call_id, data)
-    return response
-app.mount("/", socketio_app)
-if __name__ == '__main__':
-    uvicorn.run("main:app", host='0.0.0.0', port=7860, log_level="info")
-# Running in Docker Container
-if __name__ != "__main__":
-    fastapi_logger.setLevel(gunicorn_logger.level)
-else:
-    fastapi_logger.setLevel(logging.DEBUG)

backend/models/Seamless/vad_s2st_sc_24khz_main.yaml DELETED Viewed

@@ -1,25 +0,0 @@
-agent_class: seamless_communication.streaming.agents.seamless_s2st.SeamlessS2STDualVocoderVADAgent
-monotonic_decoder_model_name: seamless_streaming_monotonic_decoder
-unity_model_name: seamless_streaming_unity
-sentencepiece_model: spm_256k_nllb100.model
-task: s2st
-tgt_lang: "eng"
-min_unit_chunk_size: 50
-decision_threshold: 0.7
-no_early_stop: True
-block_ngrams: True
-vocoder_name: vocoder_v2
-expr_vocoder_name: vocoder_pretssel
-gated_model_dir: .
-expr_vocoder_gain: 3.0
-upstream_idx: 1
-wav2vec_yaml: wav2vec.yaml
-min_starting_wait_w2vbert: 192
-config_yaml: cfg_fbank_u2t.yaml
-upstream_idx: 1
-detokenize_only: True
-device: cuda:0
-max_len_a: 0
-max_len_b: 1000

backend/models/SeamlessStreaming/vad_s2st_sc_main.yaml DELETED Viewed

@@ -1,21 +0,0 @@
-agent_class: seamless_communication.streaming.agents.seamless_streaming_s2st.SeamlessStreamingS2STJointVADAgent
-monotonic_decoder_model_name: seamless_streaming_monotonic_decoder
-unity_model_name: seamless_streaming_unity
-sentencepiece_model: spm_256k_nllb100.model
-task: s2st
-tgt_lang: "eng"
-min_unit_chunk_size: 50
-decision_threshold: 0.7
-no_early_stop: True
-block_ngrams: True
-vocoder_name: vocoder_v2
-wav2vec_yaml: wav2vec.yaml
-min_starting_wait_w2vbert: 192
-config_yaml: cfg_fbank_u2t.yaml
-upstream_idx: 1
-detokenize_only: True
-device: cuda:0
-max_len_a: 0
-max_len_b: 1000

backend/mongodb/endpoints/__pycache__/calls.cpython-310.pyc DELETED Viewed

Binary file (4.74 kB)

backend/mongodb/endpoints/__pycache__/users.cpython-310.pyc DELETED Viewed

Binary file (2.43 kB)

backend/mongodb/endpoints/calls.py DELETED Viewed

@@ -1,96 +0,0 @@
-from fastapi import APIRouter, Body, Request, status, HTTPException
-from typing import List
-from datetime import date
-import sys
-from ..operations import calls as calls
-from ..models.calls import UserCaptions, UserCall, UpdateCall
-from ..endpoints.users import get_collection_users
-router = APIRouter(prefix="/call",
-    tags=["Calls"])
-def get_collection_calls(request: Request):
-  try:
-    return request.app.database["call_records"]
-    #   return request.app.database["call_test"]
-  except:
-      raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Unable to find call records Database.")
-@router.post("/create-call", response_description="Create a new user call record", status_code=status.HTTP_201_CREATED, response_model=UserCall)
-async def create_calls(request: Request, user_calls: UserCall = Body(...)):
-    collection = get_collection_calls(request)
-    return calls.create_calls(collection, user_calls)
-@router.get("/list-call", response_description="List all existing call records", response_model=List[UserCall])
-async def list_calls(request: Request, limit: int):
-    collection = get_collection_calls(request)
-    return calls.list_calls(collection, 100)
-@router.get("/find-call/{call_id}", response_description="Find user's calls based on User ID", response_model=UserCall)
-async def find_call(request: Request, call_id: str):
-    collection = get_collection_calls(request)
-    return calls.find_call(collection, call_id)
-@router.get("/find-user-calls/{user_id}", response_description="Find user's calls based on User ID", response_model=List[UserCall])
-async def find_user_calls(request: Request, user_id: str):
-    collection = get_collection_calls(request)
-    return calls.find_user_calls(collection, user_id)
-@router.get("/get-captions/{user_id}", response_description="Find user's calls based on User ID")
-async def get_caption_text(request: Request, call_id: str, user_id: str):
-    collection = get_collection_calls(request)
-    return calls.get_caption_text(collection, call_id, user_id)
-'''Key terms list can have variable length -> using POST request over GET'''
-@router.post("/find-term/", response_description="Find calls based on key term list", response_model=List[UserCall])
-async def list_transcripts_by_key_terms(request: Request, key_terms: List[str]):
-    collection = get_collection_calls(request)
-    return calls.list_transcripts_by_key_terms(collection, key_terms)
-@router.get("/find-date/{start_date}/{end_date}", response_description="Find calls based on date ranges", response_model=List[UserCall])
-async def list_transcripts_by_dates(request: Request, start_date: str, end_date: str):
-    collection = get_collection_calls(request)
-    return calls.list_transcripts_by_dates(collection, start_date, end_date)
-@router.get("/find-duration/{min_len}/{max_len}", response_description="Find calls based on call duration in minutes", response_model=List[UserCall])
-async def list_transcripts_by_duration(request: Request, min_len: int, max_len: int):
-    collection = get_collection_calls(request)
-    return calls.list_transcripts_by_duration(collection, min_len, max_len)
-@router.put("/update-call/{call_id}", response_description="Update an existing call", response_model=UpdateCall)
-async def update_calls(request: Request, call_id: str, user_calls: UpdateCall = Body(...)):
-    collection = get_collection_calls(request)
-    return calls.update_calls(collection, call_id, user_calls)
-@router.put("/update-captions/{call_id}", response_description="Update an existing call", response_model=UpdateCall)
-async def update_captions(request: Request, call_id: str, user_calls: UserCaptions = Body(...)):
-    call_collection = get_collection_calls(request)
-    user_collection = get_collection_users(request)
-    return calls.update_captions(call_collection, user_collection, call_id, user_calls)
-@router.delete("/delete-call/{call_id}", response_description="Delete a call by its id")
-async def delete_call(request: Request, call_id: str):
-    collection = get_collection_calls(request)
-    return calls.delete_calls(collection, call_id)
-@router.get("/full-text-search/{query}", response_description="Perform full text search on caption fields", response_model=List[UserCall])
-async def full_text_search(request: Request, query: str):
-    collection = get_collection_calls(request)
-    return calls.full_text_search(collection, query)
-@router.get("/fuzzy-search/{user_id}/{query}", response_description="Perform fuzzy text search on caption fields", response_model=List[UserCall])
-async def fuzzy_search(request: Request, user_id: str, query: str):
-    collection = get_collection_calls(request)
-    return calls.fuzzy_search(collection, user_id, query)
-@router.get("/summarise/{call_id}/{user_id}/{target_language}", response_description="Perform gpt-3.5 summarisation on call_id")
-async def summarise(request: Request, call_id: str, user_id: str, target_language: str):
-    collection = get_collection_calls(request)
-    return calls.summarise(collection, call_id, user_id, target_language)
-@router.get("/term-extraction/{call_id}/{user_id}/{target_language}", response_description="Perform key term extraction on call record")
-async def term_extraction(request: Request, call_id: str, user_id: str, target_language: str):
-    collection = get_collection_calls(request)
-    return calls.term_extraction(collection, call_id, user_id, target_language)

backend/mongodb/endpoints/users.py DELETED Viewed

@@ -1,53 +0,0 @@
-from fastapi import APIRouter, Body, Request, status, HTTPException
-from typing import List
-import sys
-from ..models.users import User, UpdateUser
-from ..operations import users as users
-router = APIRouter(prefix="/user",
-    tags=["User"])
-def get_collection_users(request: Request):
-  db = request.app.database["user_records"]
-  return db
-@router.post("/", response_description="Create a new user", status_code=status.HTTP_201_CREATED, response_model=User)
-async def create_user(request: Request, user: User = Body(...)):
-    collection = get_collection_users(request)
-    return users.create_user(collection, user)
-@router.get("/", response_description="List users", response_model=List[User])
-async def list_users(request: Request):
-    collection = get_collection_users(request)
-    return users.list_users(collection, 100)
-@router.put("/{user_id}", response_description="Update a User", response_model=UpdateUser)
-async def update_user(request: Request, user_id: str, user: UpdateUser = Body(...)):
-    collection = get_collection_users(request)
-    return users.update_user(collection, user_id, user)
-@router.get("/{user_id}", response_description="Get a single user by id", response_model=User)
-async def find_user(request: Request, user_id: str):
-    collection = get_collection_users(request)
-    return users.find_user(collection, user_id)
-@router.get("/find-name-id/{user_id}", response_description="Get a username from user id")
-async def find_name_from_id(request: Request, user_id: str):
-    collection = get_collection_users(request)
-    return users.find_name_from_id(collection, user_id)
-@router.get("/name/{user_name}", response_description="Get a single user by name", response_model=User)
-async def find_user_name(request: Request, name: str):
-    collection = get_collection_users(request)
-    return users.find_user_name(collection, name)
-@router.get("/email/{email_addr}", response_description="Get a single user by email", response_model=User)
-async def find_user_email(request: Request, email: str):
-    collection = get_collection_users(request)
-    return users.find_user_email(collection, email)
-@router.delete("/{user_id}", response_description="Delete a user")
-async def delete_user(request: Request, user_id:str):
-    collection = get_collection_users(request)
-    return users.delete_user(collection, user_id)

backend/mongodb/models/__pycache__/calls.cpython-310.pyc DELETED Viewed

Binary file (3.09 kB)

backend/mongodb/models/__pycache__/users.cpython-310.pyc DELETED Viewed

Binary file (1.73 kB)

backend/mongodb/models/calls.py DELETED Viewed

@@ -1,75 +0,0 @@
-import uuid
-from typing import List, Dict, Optional
-from datetime import datetime
-from pydantic import BaseModel, Field, PrivateAttr
-import sys
-''' Class for storing captions generated by SeamlessM4T'''
-class UserCaptions(BaseModel):
-    _id: uuid.UUID = PrivateAttr(default_factory=uuid.uuid4) # private attr not included in http calls
-    author_id: Optional[str] = None
-    author_username: Optional[str] = None
-    original_text: str
-    translated_text: str
-    timestamp: datetime = Field(default_factory=datetime.now)
-    class Config:
-        populate_by_name = True
-        json_schema_extra = {
-            "example": {
-                "author_id": "gLZrfTwXyLUPB3eT7xT2HZnZiZT2",
-                "author_username": "shamzino",
-                "original_text": "eng: This is original_text english text",
-                "translated_text": "spa: este es el texto traducido al español",
-                "timestamp": "2024-03-28T16:15:50.956055",
-            }
-        }
-'''Class for storing past call records from users'''
-class UserCall(BaseModel):
-    _id: uuid.UUID = PrivateAttr(default_factory=uuid.uuid4)
-    call_id: Optional[str] = None
-    caller_id: Optional[str] = None
-    callee_id: Optional[str] = None
-    creation_date: datetime = Field(default_factory=datetime.now, alias="date")
-    duration: Optional[int] = None # milliseconds
-    captions: Optional[List[UserCaptions]] = None
-    key_terms: Optional[dict] = None
-    summaries: Optional[dict] = None
-    class Config:
-        populate_by_name = True
-        json_schema_extra = {
-            "example": {
-                "call_id": "65eef930e9abd3b1e3506906",
-                "caller_id": "65ede65b6d246e52aaba9d4f",
-                "callee_id": "65edda944340ac84c1f00758",
-                "duration": 360,
-                "captions": [{"author_id": "gLZrfTwXyLUPB3eT7xT2HZnZiZT2", "author_username": "shamzino", "original_text": "eng: This is original_text english text", "translated_text": "spa: este es el texto traducido al español", "timestamp": "2024-03-28T16:15:50.956055"},
-                             {"author_id": "g7pR1qCibzQf5mDP9dGtcoWeEc92", "author_username": "benjino", "original_text": "eng: This is source english text", "translated_text": "spa: este es el texto fuente al español",  "timestamp": "2024-03-28T16:16:20.34625"}],
-                "key_terms": {"gLZrfTwXyLUPB3eT7xT2HZnZiZT2": ["original_text", "source", "english", "text"], "g7pR1qCibzQf5mDP9dGtcoWeEc92": ["translated_text", "destination", "spanish", "text"]},
-                "summaries": {"gLZrfTwXyLUPB3eT7xT2HZnZiZT2": "This is a short test on lanuguage translation", "65edda944340ac84c1f00758": "Esta es una breve prueba sobre traducción de idiomas."}
-            }
-        }
-''' Class for updating User Call record'''
-class UpdateCall(BaseModel):
-    call_id: Optional[str] = None
-    caller_id: Optional[str] = None
-    callee_id: Optional[str] = None
-    duration: Optional[int] = None
-    captions: Optional[List[UserCaptions]] = None
-    key_terms: Optional[List[str]] = None
-    class Config:
-        populate_by_name = True
-        json_schema_extra = {
-            "example": {
-                "duration": "500"
-            }
-        }

backend/mongodb/models/users.py DELETED Viewed

@@ -1,44 +0,0 @@
-import uuid
-from typing import List, Optional
-from pydantic import BaseModel, Field, SecretStr, PrivateAttr
-from pydantic.networks import EmailStr
-'''Class for user model used to relate users to past calls'''
-class User(BaseModel):
-    _id: uuid.UUID = PrivateAttr(default_factory=uuid.uuid4) # private attr not included in http calls
-    user_id: str
-    name: str
-    email: EmailStr = Field(unique=True, index=True)
-    # password: SecretStr
-    call_ids: Optional[List[str]] = None
-    class Config:
-        populate_by_name = True
-        json_schema_extra = {
-            "example": {
-                "user_id": "65ede65b6d246e52aaba9d4f",
-                "name": "benjolo",
-                "email": "benjolounchained@gmail.com",
-                "call_ids": ["65e205ced1be3a22854ff300", "65df8c3eba9c7c2ed1b20e85"]
-            }
-        }
-'''Class for updating user records'''
-class UpdateUser(BaseModel):
-    user_id: Optional[str] = None
-    name: Optional[str] = None
-    email: Optional[EmailStr] = None
-    ''' To decode use -> SecretStr("abc").get_secret_value()'''
-    # password: Optional[SecretStr]
-    call_ids: Optional[List[str]] = None
-    class Config:
-        populate_by_name = True
-        json_schema_extra = {
-            "example": {
-                "email": "benjolounchained21@gmail.com",
-                "call_ids": ["65e205ced1be3a22854ff300", "65df8c3eba9c7c2ed1b20e85", "65eef930e9abd3b1e3506906"]
-            }
-        }

backend/mongodb/operations/__pycache__/calls.cpython-310.pyc DELETED Viewed

Binary file (6.61 kB)

backend/mongodb/operations/__pycache__/users.cpython-310.pyc DELETED Viewed

Binary file (2.93 kB)

backend/mongodb/operations/calls.py DELETED Viewed

@@ -1,280 +0,0 @@
-from fastapi import Body, Request, HTTPException, status
-from fastapi.encoders import jsonable_encoder
-import sys
-from ..models.calls import UpdateCall, UserCall, UserCaptions
-from ..operations.users import *
-from utils.text_rank import extract_terms
-from openai import OpenAI
-from time import sleep
-import os
-from dotenv import dotenv_values
-# Used within calls to create call record in main.py
-def create_calls(collection, user: UserCall = Body(...)):
-    calls = jsonable_encoder(user)
-    new_calls = collection.insert_one(calls)
-    created_calls = collection.find_one({"_id": new_calls.inserted_id})
-    return created_calls
-def list_calls(collection, limit: int):
-    try:
-        calls = collection.find(limit = limit)
-        return list(calls)
-    except:
-        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"No existing call records yet.")
-'''Finding calls based on call id'''
-def find_call(collection, call_id: str):
-    user_calls = collection.find_one({"call_id": call_id})
-    if user_calls is not None:
-        return user_calls
-    else:
-        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call with ID: '{call_id}' not found.")
-'''Finding calls based on user id'''
-def find_user_calls(collection, user_id: str):
-    user_calls = list(collection.find({"$or": [{"caller_id": user_id}, {"callee_id": user_id}]})) # match on caller or callee ID
-    if len(user_calls):
-        return user_calls
-    else:
-        return [] # return empty list if no existing calls for TranscriptView frontend component
-'''Finding calls based on key terms list'''
-def list_transcripts_by_key_terms(collection, key_terms_list: list[str] = Body(...)):
-    key_terms_list = jsonable_encoder(key_terms_list)
-    call_records = list(collection.find({"key_terms": {"$in": key_terms_list}}, {'_id': 0})) # exclude returning ObjectID in find()
-    # Check if any call records were returned
-    if len(call_records):
-        return call_records
-    else:
-        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call with key terms: '{key_terms_list}' not found!")
-'''Finding calls based on date ranges'''
-def list_transcripts_by_dates(collection, start_date: str, end_date: str):
-    # print(start_date, end_date)
-    # Convert strings to date string in YYYY-MM-ddT00:00:00 format
-    start_date = f'{start_date}T00:00:00'
-    end_date = f'{end_date}T00:00:00'
-    call_records = list(collection.find({"date":{"$gte": start_date, "$lte": end_date}}))
-    if len(call_records):
-        return call_records
-    else:
-        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call with creation date between: '{start_date} - {end_date}' not found!")
-'''Finding calls based on call lengths'''
-def list_transcripts_by_duration(collection, min_len: int, max_len: int):
-    call_records = list(collection.find({"duration":{"$gte": min_len, "$lte": max_len}}))
-    if len(call_records):
-        return call_records
-    else:
-        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call with duration between: '{min_len} - {max_len}' milliseconds not found!")
-def update_calls(collection, call_id: str, calls: UpdateCall = Body(...)):
-    # calls = {k: v for k, v in calls.model_dump().items() if v is not None} #loop in the dict
-    calls = {k: v for k, v in calls.items() if v is not None} #loop in the dict
-    print(calls)
-    if len(calls) >= 1:
-        update_result = collection.update_one({"call_id": call_id}, {"$set": calls})
-        if update_result.modified_count == 0:
-            raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call not updated!")
-    if (existing_item := collection.find_one({"call_id": call_id})) is not None:
-        return existing_item
-    raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call not found!")
-def update_captions(call_collection, user_collection, call_id: str, captions: UserCaptions = Body(...)):
-    # captions = {k: v for k, v in calls.model_dump().items() if v is not None}
-    captions = {k: v for k, v in captions.items() if v is not None}
-    # print(captions)
-    # index user_id from caption object
-    userID = captions["author_id"]
-    # print(userID)
-    # use user id to get user name
-    username = find_name_from_id(user_collection, userID)
-    # print(username)
-    # add user name to captions json/object
-    captions["author_username"] = username
-    # print(captions)
-    if len(captions) >= 1:
-        update_result = call_collection.update_one({"call_id": call_id},
-                                              {"$push": {"captions": captions}})
-        if update_result.modified_count == 0:
-            raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Captions not updated!")
-    if (existing_item := call_collection.find_one({"call_id": call_id})) is not None:
-        return existing_item
-    raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Captions not found!")
-def delete_calls(collection, call_id: str):
-    deleted_calls = collection.delete_one({"call_id": call_id})
-    if deleted_calls.deleted_count == 1:
-        return f"Call deleted sucessfully!"
-    raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call not found!")
-# def get_caption_text(collection, call_id):
-#     call_record = find_call((collection), call_id)
-#     try: # Check if call has any captions first
-#         caption_records = call_record['captions']
-#     except KeyError:
-#         return None
-#     # iterate through caption embedded document and store original text
-#     combined_text = [caption['original_text'] for caption in caption_records]
-#     return " ".join(combined_text)
-def get_caption_text(collection, call_id, user_id):
-    call_record = find_call((collection), call_id)
-    try: # Check if call has any captions first
-        caption_records = call_record['captions']
-    except KeyError:
-        return None
-    # iterate through caption embedded document and store original text
-    # combined_text = [caption['original_text'] for caption in caption_records]
-    combined_text = []
-    for caption_segment in caption_records:
-        if caption_segment['author_id'] == user_id:
-            combined_text.append(caption_segment['original_text'])
-        else:
-            combined_text.append(caption_segment['translated_text'])
-    return " ".join(combined_text)
-# standard exact match based full text search
-def full_text_search(collection, query):
-    # drop any existing indexes and create new one
-    collection.drop_indexes()
-    collection.create_index([('captions.original_text', 'text'), ('captions.tranlated_text', 'text')],
-                            name='captions')
-    # print(collection.index_information())
-    results = list(collection.find({"$text": {"$search": query}}))
-    return results
-# approximate string matching
-def fuzzy_search(collection, user_id, query):
-    user_calls = collection.find({"$or": [{"caller_id": user_id}, {"callee_id": user_id}]})
-    # drop any existing indexes and create new one
-    collection.drop_indexes()
-    collection.create_index([('captions.original_text', 'text'), ('captions.tranlated_text', 'text')],
-                            name='captions')
-    pipeline = [
-        {
-            "$search": {
-                "text": {
-                    "query": query,
-                    "path": {"wildcard": "*"},
-                    "fuzzy": {}
-                }
-            }
-        }
-    ]
-    collection_results = list(collection.aggregate(pipeline))
-    # add all users records to output
-    records = []
-    for doc in collection_results:
-        if doc['caller_id'] == user_id or doc['callee_id'] == user_id:
-            records.append(doc)
-    return records
-def summarise(collection, call_id, user_id, target_language):
-    # client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
-    config = dotenv_values(".env")
-    client = OpenAI(api_key=config["OPENAI_API_KEY"])
-    # get caption text using call_id
-    caption_text = get_caption_text(collection, call_id, user_id)
-    chat_completion = client.chat.completions.create(
-        messages=[
-            {
-                "role": "user",
-                "content": f"The following is an extract from a call transcript. Rewrite this as a structured, clear summary in {target_language}. \
-                            \n\Call Transcript: \"\"\"\n{caption_text}\n\"\"\"\n"
-            }
-        ],
-        model="gpt-3.5-turbo",
-    )
-    # Gpt-3.5 turbo has 4096 token limit -> request will fail if exceeded
-    try:
-        result = chat_completion.choices[0].message.content.split(":")[1].strip() # parse summary
-    except:
-        return None
-    # BO - add result to mongodb -> should be done asynchronously
-    # summary_payload = {"summaries": {user_id: result}}
-    update_result = collection.update_one({"call_id": call_id}, {"$set": {f"summaries.{user_id}": result}})
-    if update_result.modified_count == 0:
-        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call not updated!")
-    # if (existing_item := collection.find_one({"call_id": call_id})) is not None:
-    #     print(existing_item)
-    return result
-def term_extraction(collection, call_id, user_id, target_language):
-    combined_text = get_caption_text(collection, call_id, user_id)
-    if len(combined_text) > 50: # > min_caption_length: -> poor term extraction on short transcripts
-        # Extract Key Terms from Concatenated Caption Field
-        key_terms = extract_terms(combined_text, target_language, len(combined_text))
-        update_result = collection.update_one({"call_id": call_id}, {"$set": {f"key_terms.{user_id}": key_terms}})
-    if update_result.modified_count == 0:
-        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call not updated!")
-    return key_terms

backend/mongodb/operations/users.py DELETED Viewed

@@ -1,77 +0,0 @@
-from fastapi import Body, Request, HTTPException, status
-from fastapi.encoders import jsonable_encoder
-import sys
-from ..models.users import User, UpdateUser
-from bson import ObjectId
-import re
-def create_user(collection, user: User = Body(...)):
-    user = jsonable_encoder(user)
-    new_user = collection.insert_one(user)
-    created_user = collection.find_one({"_id": new_user.inserted_id})
-    print("NEW ID IS:.........", new_user.inserted_id)
-    return created_user
-def list_users(collection, limit: int):
-    try:
-        users = list(collection.find(limit = limit))
-        return users
-    except:
-        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"No users found!")
-def find_user(collection, user_id: str):
-    if (user := collection.find_one({"user_id": user_id})):
-        return user
-    else:
-        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"User with user_id {user_id} not found!")
-def find_name_from_id(collection, user_id: str):
-    # find_one user record based on user id and project for user name
-    if (user_name := collection.find_one({"user_id": user_id}, {"name": 1, "_id": 0})):
-        return user_name['name'] # index name field from single field record returned
-    else:
-        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"User with user_id {user_id} not found!")
-def find_user_name(collection, name: str):
-    # search for name in lowercase
-    if (user := collection.find_one({"name": re.compile('^' + re.escape(name) + '$', re.IGNORECASE)})):
-        return user
-    else:
-        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"User with name {name} not found!")
-def find_user_email(collection, email: str):
-    if (user := collection.find_one({"email": re.compile('^' + re.escape(email) + '$', re.IGNORECASE)})):
-        return user
-    else:
-        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"User with Email Address {email} not found!")
-''' Update user record based on user object/json'''
-def update_user(collection, user_id: str, user: UpdateUser):
-    try:
-        user = {k: v for k, v in user.model_dump().items() if v is not None}
-        if len(user) >= 1:
-            update_result = collection.update_one({"user_id": user_id}, {"$set": user})
-            if update_result.modified_count == 0:
-                raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"User with user_id: '{user_id}' not found and updated!")
-        if (existing_users := collection.find_one({"user_id": user_id})) is not None:
-            return existing_users
-    except:
-        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"User with user_id: '{user_id}' not found and updated!")
-def delete_user(collection, user_id: str):
-    try:
-        deleted_user = collection.delete_one({"user_id": user_id})
-        if deleted_user.deleted_count == 1:
-            return f"User with user_id {user_id} deleted sucessfully"
-    except:
-        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"User with user_id {user_id} not found!")

backend/pcmToWav.py DELETED Viewed

@@ -1,34 +0,0 @@
-import wave
-import os
-basePath = os.path.expanduser("~/Desktop/")
-def convert_pcm_to_wav():
-    # PCM file parameters (should match the parameters used to create the PCM file)
-    pcm_file = basePath + 'output.pcm'
-    wav_file = 'pcmconverted.wav'
-    sample_rate = 16000  # Example: 16000 Hz
-    channels = 1         # Example: 2 for stereo
-    sample_width = 2     # Example: 2 bytes (16 bits), change if your PCM format is different
-    # Read the PCM file and write to a WAV file
-    with open(pcm_file, 'rb') as pcmfile:
-        pcm_data = pcmfile.read()
-    with wave.open(wav_file, 'wb') as wavfile:
-        wavfile.setnchannels(channels)
-        wavfile.setsampwidth(sample_width)
-        wavfile.setframerate(sample_rate)
-        wavfile.writeframes(pcm_data)
-convert_pcm_to_wav()
-# def generateCaptions(filepath):
-# ! This might be redundant due to seamless-streaming
-print(f"Converted {pcm_file} to {wav_file}")

backend/preprocess_wav.py DELETED Viewed

@@ -1,65 +0,0 @@
-import soundfile
-import io
-from typing import Any, Tuple, Union, Optional
-import numpy as np
-import torch
-def preprocess_wav(data: Any, incoming_sample_rate) -> Tuple[np.ndarray, int]:
-        segment, sample_rate = soundfile.read(
-            io.BytesIO(data),
-            dtype="float32",
-            always_2d=True,
-            frames=-1,
-            start=0,
-            format="RAW",
-            subtype="PCM_16",
-            samplerate=incoming_sample_rate,
-            channels=1,
-        )
-        return segment, sample_rate
-def convert_waveform(
-        waveform: Union[np.ndarray, torch.Tensor],
-        sample_rate: int,
-        normalize_volume: bool = False,
-        to_mono: bool = False,
-        to_sample_rate: Optional[int] = None,
-    ) -> Tuple[Union[np.ndarray, torch.Tensor], int]:
-        """convert a waveform:
-        - to a target sample rate
-        - from multi-channel to mono channel
-        - volume normalization
-        Args:
-            waveform (numpy.ndarray or torch.Tensor): 2D original waveform
-                (channels x length)
-            sample_rate (int): original sample rate
-            normalize_volume (bool): perform volume normalization
-            to_mono (bool): convert to mono channel if having multiple channels
-            to_sample_rate (Optional[int]): target sample rate
-        Returns:
-            waveform (numpy.ndarray): converted 2D waveform (channels x length)
-            sample_rate (float): target sample rate
-        """
-        try:
-            import torchaudio.sox_effects as ta_sox
-        except ImportError:
-            raise ImportError("Please install torchaudio: pip install torchaudio")
-        effects = []
-        if normalize_volume:
-            effects.append(["gain", "-n"])
-        if to_sample_rate is not None and to_sample_rate != sample_rate:
-            effects.append(["rate", f"{to_sample_rate}"])
-        if to_mono and waveform.shape[0] > 1:
-            effects.append(["channels", "1"])
-        if len(effects) > 0:
-            is_np_input = isinstance(waveform, np.ndarray)
-            _waveform = torch.from_numpy(waveform) if is_np_input else waveform
-            converted, converted_sample_rate = ta_sox.apply_effects_tensor(
-                _waveform, sample_rate, effects
-            )
-            if is_np_input:
-                converted = converted.numpy()
-            return converted, converted_sample_rate
-        return waveform, sample_rate

backend/requirements.txt DELETED Viewed

@@ -1,28 +0,0 @@
-colorlog==6.8.2
-contextlib2==21.6.0
-fastapi==0.110.1
-g2p_en==2.1.0
-matplotlib==3.7.0
-numpy==1.24.2
-openai==1.20.0
-protobuf==5.26.1
-pydantic==2.7.0
-pydub==0.25.1
-pymongo==4.6.2
-PySoundFile==0.9.0.post1
-python-dotenv==1.0.1
-python-socketio==5.9.0
-pymongo==4.6.2
-Requests==2.31.0
-sentencepiece==0.1.99
-simuleval==1.1.4
-soundfile==0.12.1
-spacy==3.7.4
-pytextrank==3.3.0
-torch==2.1.2
-torchaudio==2.1.2
-#transformers==4.20.1
-uvicorn==0.29.0
-vad==1.0.2
-hf_transfer==0.1.4
-huggingface_hub==0.19.4

backend/routes/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- from.routing import router

backend/routes/__pycache__/__init__.cpython-310.pyc DELETED Viewed

Binary file (235 Bytes)

backend/routes/__pycache__/routing.cpython-310.pyc DELETED Viewed

Binary file (375 Bytes)

backend/routes/routing.py DELETED Viewed

@@ -1,9 +0,0 @@
-from fastapi import APIRouter
-import sys
-# sys.path.append('/Users/benolojo/DCU/CA4/ca400_FinalYearProject/2024-ca400-olojob2-majdap2/src/backend/src/')
-from mongodb.endpoints import users, calls
-router = APIRouter()
-router.include_router(calls.router)
-router.include_router(users.router)
-# router.include_router(transcripts.router)

backend/seamless/__init__.py DELETED Viewed

File without changes

backend/seamless/room.py DELETED Viewed

@@ -1,64 +0,0 @@
-# import json
-import uuid
-class Room:
-    def __init__(self, room_id) -> None:
-        self.room_id = room_id
-        # members is a dict from client_id to Member
-        self.members = {}
-        # listeners and speakers are lists of client_id's
-        self.listeners = []
-        self.speakers = []
-    def __str__(self) -> str:
-        return f"Room {self.room_id} ({len(self.members)} member{'s' if len(self.members) == 1 else ''})"
-    def to_json(self):
-        varsResult = vars(self)
-        # Remember: result is just a shallow copy, so result.members === self.members
-        # Because of that, we need to jsonify self.members without writing over result.members,
-        # which we do here via dictionary unpacking (the ** operator)
-        result = {
-            **varsResult,
-            "members": {key: value.to_json() for (key, value) in self.members.items()},
-            "activeTranscoders": self.get_active_transcoders(),
-        }
-        return result
-    def get_active_connections(self):
-        return len(
-            [m for m in self.members.values() if m.connection_status == "connected"]
-        )
-    def get_active_transcoders(self):
-        return len([m for m in self.members.values() if m.transcoder is not None])
-    def get_room_status_dict(self):
-        return {
-            "activeConnections": self.get_active_connections(),
-            "activeTranscoders": self.get_active_transcoders(),
-        }
-class Member:
-    def __init__(self, client_id, session_id, name) -> None:
-        self.client_id = client_id
-        self.session_id = session_id
-        self.name = name
-        self.connection_status = "connected"
-        self.transcoder = None
-        self.requested_output_type = None
-        self.transcoder_dynamic_config = None
-    def __str__(self) -> str:
-        return f"{self.name} (id: {self.client_id[:4]}...) ({self.connection_status})"
-    def to_json(self):
-        self_vars = vars(self)
-        return {
-            **self_vars,
-            "transcoder": self.transcoder is not None,
-        }

backend/seamless/simuleval_agent_directory.py DELETED Viewed

@@ -1,171 +0,0 @@
-# Creates a directory in which to look up available agents
-import os
-from typing import List, Optional
-from seamless.simuleval_transcoder import SimulevalTranscoder
-import json
-import logging
-logger = logging.getLogger("gunicorn")
-# fmt: off
-M4T_P0_LANGS = [
-    "eng",
-    "arb", "ben", "cat", "ces", "cmn", "cym", "dan",
-    "deu", "est", "fin", "fra", "hin", "ind", "ita",
-    "jpn", "kor", "mlt", "nld", "pes", "pol", "por",
-    "ron", "rus", "slk", "spa", "swe", "swh", "tel",
-    "tgl", "tha", "tur", "ukr", "urd", "uzn", "vie",
-]
-# fmt: on
-class NoAvailableAgentException(Exception):
-    pass
-class AgentWithInfo:
-    def __init__(
-        self,
-        agent,
-        name: str,
-        modalities: List[str],
-        target_langs: List[str],
-        # Supported dynamic params are defined in StreamingTypes.ts
-        dynamic_params: List[str] = [],
-        description="",
-        has_expressive: Optional[bool] = None,
-    ):
-        self.agent = agent
-        self.has_expressive = has_expressive
-        self.name = name
-        self.description = description
-        self.modalities = modalities
-        self.target_langs = target_langs
-        self.dynamic_params = dynamic_params
-    def get_capabilities_for_json(self):
-        return {
-            "name": self.name,
-            "description": self.description,
-            "modalities": self.modalities,
-            "targetLangs": self.target_langs,
-            "dynamicParams": self.dynamic_params,
-        }
-    @classmethod
-    def load_from_json(cls, config: str):
-        """
-        Takes in JSON array of models to load in, e.g.
-        [{"name": "s2s_m4t_emma-unity2_multidomain_v0.1", "description": "M4T model that supports simultaneous S2S and S2T", "modalities": ["s2t", "s2s"], "targetLangs": ["en"]},
-        {"name": "s2s_m4t_expr-emma_v0.1", "description": "ES-EN expressive model that supports S2S and S2T", "modalities": ["s2t", "s2s"], "targetLangs": ["en"]}]
-        """
-        configs = json.loads(config)
-        agents = []
-        for config in configs:
-            agent = SimulevalTranscoder.build_agent(config["name"])
-            agents.append(
-                AgentWithInfo(
-                    agent=agent,
-                    name=config["name"],
-                    modalities=config["modalities"],
-                    target_langs=config["targetLangs"],
-                )
-            )
-        return agents
-class SimulevalAgentDirectory:
-    # Available models. These are the directories where the models can be found, and also serve as an ID for the model.
-    seamless_streaming_agent = "SeamlessStreaming"
-    seamless_agent = "Seamless"
-    def __init__(self):
-        self.agents = []
-        self.did_build_and_add_agents = False
-    def add_agent(self, agent: AgentWithInfo):
-        self.agents.append(agent)
-    def build_agent_if_available(self, model_id, config_name=None):
-        agent = None
-        try:
-            if config_name is not None:
-                agent = SimulevalTranscoder.build_agent(
-                    model_id,
-                    config_name=config_name,
-                )
-            else:
-                agent = SimulevalTranscoder.build_agent(
-                    model_id,
-                )
-        except Exception as e:
-            from fairseq2.assets.error import AssetError
-            logger.warning("Failed to build agent %s: %s" % (model_id, e))
-            if isinstance(e, AssetError):
-                logger.warning(
-                    "Please download gated assets and set `gated_model_dir` in the config"
-                )
-            raise e
-        return agent
-    def build_and_add_agents(self, models_override=None):
-        if self.did_build_and_add_agents:
-            return
-        if models_override is not None:
-            agent_infos = AgentWithInfo.load_from_json(models_override)
-            for agent_info in agent_infos:
-                self.add_agent(agent_info)
-        else:
-            s2s_agent = None
-            if os.environ.get("USE_EXPRESSIVE_MODEL", "0") == "1":
-                logger.info("Building expressive model...")
-                s2s_agent = self.build_agent_if_available(
-                    SimulevalAgentDirectory.seamless_agent,
-                    config_name="vad_s2st_sc_24khz_main.yaml",
-                )
-                has_expressive = True
-            else:
-                logger.info("Building non-expressive model...")
-                s2s_agent = self.build_agent_if_available(
-                    SimulevalAgentDirectory.seamless_streaming_agent,
-                    config_name="vad_s2st_sc_main.yaml",
-                )
-                has_expressive = False
-            if s2s_agent:
-                self.add_agent(
-                    AgentWithInfo(
-                        agent=s2s_agent,
-                        name=SimulevalAgentDirectory.seamless_streaming_agent,
-                        modalities=["s2t", "s2s"],
-                        target_langs=M4T_P0_LANGS,
-                        dynamic_params=["expressive"],
-                        description="multilingual expressive model that supports S2S and S2T",
-                        has_expressive=has_expressive,
-                    )
-                )
-        if len(self.agents) == 0:
-            logger.error(
-                "No agents were loaded. This likely means you are missing the actual model files specified in simuleval_agent_directory."
-            )
-        self.did_build_and_add_agents = True
-    def get_agent(self, name):
-        for agent in self.agents:
-            if agent.name == name:
-                return agent
-        return None
-    def get_agent_or_throw(self, name):
-        agent = self.get_agent(name)
-        if agent is None:
-            raise NoAvailableAgentException("No agent found with name= %s" % (name))
-        return agent
-    def get_agents_capabilities_list_for_json(self):
-        return [agent.get_capabilities_for_json() for agent in self.agents]

backend/seamless/simuleval_transcoder.py DELETED Viewed

@@ -1,428 +0,0 @@
-from simuleval.utils.agent import build_system_from_dir
-from typing import Any, List, Optional, Tuple, Union
-import numpy as np
-import soundfile
-import io
-import asyncio
-from simuleval.agents.pipeline import TreeAgentPipeline
-from simuleval.agents.states import AgentStates
-from simuleval.data.segments import Segment, EmptySegment, SpeechSegment
-import threading
-import math
-import logging
-import sys
-from pathlib import Path
-import time
-from g2p_en import G2p
-import torch
-import traceback
-import time
-import random
-import colorlog
-from .speech_and_text_output import SpeechAndTextOutput
-MODEL_SAMPLE_RATE = 16_000
-logger = logging.getLogger(__name__)
-# logger.propagate = False
-handler = colorlog.StreamHandler(stream=sys.stdout)
-formatter = colorlog.ColoredFormatter(
-    "%(log_color)s[%(asctime)s][%(levelname)s][%(module)s]:%(reset)s %(message)s",
-    reset=True,
-    log_colors={
-        "DEBUG": "cyan",
-        "INFO": "green",
-        "WARNING": "yellow",
-        "ERROR": "red",
-        "CRITICAL": "red,bg_white",
-    },
-)
-handler.setFormatter(formatter)
-logger.addHandler(handler)
-logger.setLevel(logging.WARNING)
-class OutputSegments:
-    def __init__(self, segments: Union[List[Segment], Segment]):
-        if isinstance(segments, Segment):
-            segments = [segments]
-        self.segments: List[Segment] = [s for s in segments]
-    @property
-    def is_empty(self):
-        return all(segment.is_empty for segment in self.segments)
-    @property
-    def finished(self):
-        return all(segment.finished for segment in self.segments)
-    def compute_length(self, g2p):
-        lengths = []
-        for segment in self.segments:
-            if segment.data_type == "text":
-                lengths.append(len([x for x in g2p(segment.content) if x != " "]))
-            elif segment.data_type == "speech":
-                lengths.append(len(segment.content) / MODEL_SAMPLE_RATE)
-            elif isinstance(segment, EmptySegment):
-                continue
-            else:
-                logger.warning(
-                    f"Unexpected data_type: {segment.data_type} not in 'speech', 'text'"
-                )
-        return max(lengths)
-    @classmethod
-    def join_output_buffer(
-        cls, buffer: List[List[Segment]], output: SpeechAndTextOutput
-    ):
-        num_segments = len(buffer[0])
-        for i in range(num_segments):
-            segment_list = [
-                buffer[j][i]
-                for j in range(len(buffer))
-                if buffer[j][i].data_type is not None
-            ]
-            if len(segment_list) == 0:
-                continue
-            if len(set(segment.data_type for segment in segment_list)) != 1:
-                logger.warning(
-                    f"Data type mismatch at {i}: {set(segment.data_type for segment in segment_list)}"
-                )
-                continue
-            data_type = segment_list[0].data_type
-            if data_type == "text":
-                if output.text is not None:
-                    logger.warning("Multiple text outputs, overwriting!")
-                output.text = " ".join([segment.content for segment in segment_list])
-            elif data_type == "speech":
-                if output.speech_samples is not None:
-                    logger.warning("Multiple speech outputs, overwriting!")
-                speech_out = []
-                for segment in segment_list:
-                    speech_out += segment.content
-                output.speech_samples = speech_out
-                output.speech_sample_rate = segment.sample_rate
-            elif isinstance(segment_list[0], EmptySegment):
-                continue
-            else:
-                logger.warning(
-                    f"Invalid output buffer data type: {data_type}, expected 'speech' or 'text"
-                )
-        return output
-    def __repr__(self) -> str:
-        repr_str = str(self.segments)
-        return f"{self.__class__.__name__}(\n\t{repr_str}\n)"
-class SimulevalTranscoder:
-    def __init__(self, agent, sample_rate, debug, buffer_limit):
-        self.agent = agent.agent
-        self.has_expressive = agent.has_expressive
-        self.input_queue = asyncio.Queue()
-        self.output_queue = asyncio.Queue()
-        self.states = self.agent.build_states()
-        if debug:
-            self.get_states_root().debug = True
-        self.incoming_sample_rate = sample_rate
-        self.close = False
-        self.g2p = G2p()
-        # buffer all outgoing translations within this amount of time
-        self.output_buffer_idle_ms = 5000
-        self.output_buffer_size_limit = (
-            buffer_limit  # phonemes for text, seconds for speech
-        )
-        self.output_buffer_cur_size = 0
-        self.output_buffer: List[List[Segment]] = []
-        self.speech_output_sample_rate = None
-        self.last_output_ts = time.time() * 1000
-        self.timeout_ms = (
-            30000  # close the transcoder thread after this amount of silence
-        )
-        self.first_input_ts = None
-        self.first_output_ts = None
-        self.debug = debug
-        self.debug_ts = f"{time.time()}_{random.randint(1000, 9999)}"
-        if self.debug:
-            debug_folder = Path(__file__).resolve().parent.parent / "debug"
-            self.test_incoming_wav = soundfile.SoundFile(
-                debug_folder / f"{self.debug_ts}_test_incoming.wav",
-                mode="w+",
-                format="WAV",
-                subtype="PCM_16",
-                samplerate=self.incoming_sample_rate,
-                channels=1,
-            )
-            self.get_states_root().test_input_segments_wav = soundfile.SoundFile(
-                debug_folder / f"{self.debug_ts}_test_input_segments.wav",
-                mode="w+",
-                format="WAV",
-                samplerate=MODEL_SAMPLE_RATE,
-                channels=1,
-            )
-    def get_states_root(self) -> AgentStates:
-        if isinstance(self.agent, TreeAgentPipeline):
-            # self.states is a dict
-            return self.states[self.agent.source_module]
-        else:
-            # self.states is a list
-            return self.states[0]
-    def reset_states(self):
-        if isinstance(self.agent, TreeAgentPipeline):
-            states_iter = self.states.values()
-        else:
-            states_iter = self.states
-        for state in states_iter:
-            state.reset()
-    def debug_log(self, *args):
-        if self.debug:
-            logger.info(*args)
-    @classmethod
-    def build_agent(cls, model_path, config_name):
-        logger.info(f"Building simuleval agent: {model_path}, {config_name}")
-        agent = build_system_from_dir(
-            Path(__file__).resolve().parent.parent / f"models/{model_path}",
-            config_name=config_name,
-        )
-        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        agent.to(device, fp16=True)
-        logger.info(
-            f"Successfully built simuleval agent {model_path} on device {device}"
-        )
-        return agent
-    def process_incoming_bytes(self, incoming_bytes, dynamic_config):
-        # TODO: We probably want to do some validation on dynamic_config to ensure it has what we needs
-        segment, sr = self._preprocess_wav(incoming_bytes)
-        segment = SpeechSegment(
-            content=segment,
-            sample_rate=sr,
-            tgt_lang=dynamic_config.get("targetLanguage"),
-            config=dynamic_config,
-        )
-        if dynamic_config.get("expressive") is True and self.has_expressive is False:
-            logger.warning(
-                "Passing 'expressive' but the agent does not support expressive output!"
-            )
-        # # segment is array([0, 0, 0, ..., 0, 0, 0], dtype=int16)
-        self.input_queue.put_nowait(segment)
-    def get_input_segment(self):
-        if self.input_queue.empty():
-            return None
-        chunk = self.input_queue.get_nowait()
-        self.input_queue.task_done()
-        return chunk
-    def convert_waveform(
-        self,
-        waveform: Union[np.ndarray, torch.Tensor],
-        sample_rate: int,
-        normalize_volume: bool = False,
-        to_mono: bool = False,
-        to_sample_rate: Optional[int] = None,
-    ) -> Tuple[Union[np.ndarray, torch.Tensor], int]:
-        """convert a waveform:
-        - to a target sample rate
-        - from multi-channel to mono channel
-        - volume normalization
-        Args:
-            waveform (numpy.ndarray or torch.Tensor): 2D original waveform
-                (channels x length)
-            sample_rate (int): original sample rate
-            normalize_volume (bool): perform volume normalization
-            to_mono (bool): convert to mono channel if having multiple channels
-            to_sample_rate (Optional[int]): target sample rate
-        Returns:
-            waveform (numpy.ndarray): converted 2D waveform (channels x length)
-            sample_rate (float): target sample rate
-        """
-        try:
-            import torchaudio.sox_effects as ta_sox
-        except ImportError:
-            raise ImportError("Please install torchaudio: pip install torchaudio")
-        effects = []
-        if normalize_volume:
-            effects.append(["gain", "-n"])
-        if to_sample_rate is not None and to_sample_rate != sample_rate:
-            effects.append(["rate", f"{to_sample_rate}"])
-        if to_mono and waveform.shape[0] > 1:
-            effects.append(["channels", "1"])
-        if len(effects) > 0:
-            is_np_input = isinstance(waveform, np.ndarray)
-            _waveform = torch.from_numpy(waveform) if is_np_input else waveform
-            converted, converted_sample_rate = ta_sox.apply_effects_tensor(
-                _waveform, sample_rate, effects
-            )
-            if is_np_input:
-                converted = converted.numpy()
-            return converted, converted_sample_rate
-        return waveform, sample_rate
-    def _preprocess_wav(self, data: Any) -> Tuple[np.ndarray, int]:
-        segment, sample_rate = soundfile.read(
-            io.BytesIO(data),
-            dtype="float32",
-            always_2d=True,
-            frames=-1,
-            start=0,
-            format="RAW",
-            subtype="PCM_16",
-            samplerate=self.incoming_sample_rate,
-            channels=1,
-        )
-        if self.debug:
-            self.test_incoming_wav.seek(0, soundfile.SEEK_END)
-            self.test_incoming_wav.write(segment)
-        segment = segment.T
-        segment, new_sample_rate = self.convert_waveform(
-            segment,
-            sample_rate,
-            normalize_volume=False,
-            to_mono=True,
-            to_sample_rate=MODEL_SAMPLE_RATE,
-        )
-        assert MODEL_SAMPLE_RATE == new_sample_rate
-        segment = segment.squeeze(axis=0)
-        return segment, new_sample_rate
-    def process_pipeline_impl(self, input_segment):
-        try:
-            with torch.no_grad():
-                output_segment = OutputSegments(
-                    self.agent.pushpop(input_segment, self.states)
-                )
-            if (
-                self.get_states_root().first_input_ts is not None
-                and self.first_input_ts is None
-            ):
-                # TODO: this is hacky
-                self.first_input_ts = self.get_states_root().first_input_ts
-            if not output_segment.is_empty:
-                self.output_queue.put_nowait(output_segment)
-            if output_segment.finished:
-                self.debug_log("OUTPUT SEGMENT IS FINISHED. Resetting states.")
-                self.reset_states()
-                if self.debug:
-                    # when we rebuild states, this value is reset to whatever
-                    # is in the system dir config, which defaults debug=False.
-                    self.get_states_root().debug = True
-        except Exception as e:
-            logger.error(f"Got exception while processing pipeline: {e}")
-            traceback.print_exc()
-        return input_segment
-    def process_pipeline_loop(self):
-        if self.close:
-            return  # closes the thread
-        self.debug_log("processing_pipeline")
-        while not self.close:
-            input_segment = self.get_input_segment()
-            if input_segment is None:
-                if self.get_states_root().is_fresh_state:  # TODO: this is hacky
-                    time.sleep(0.3)
-                else:
-                    time.sleep(0.03)
-                continue
-            self.process_pipeline_impl(input_segment)
-        self.debug_log("finished processing_pipeline")
-    def process_pipeline_once(self):
-        if self.close:
-            return
-        self.debug_log("processing pipeline once")
-        input_segment = self.get_input_segment()
-        if input_segment is None:
-            return
-        self.process_pipeline_impl(input_segment)
-        self.debug_log("finished processing_pipeline_once")
-    def get_output_segment(self):
-        if self.output_queue.empty():
-            return None
-        output_chunk = self.output_queue.get_nowait()
-        self.output_queue.task_done()
-        return output_chunk
-    def start(self):
-        self.debug_log("starting transcoder in a thread")
-        threading.Thread(target=self.process_pipeline_loop).start()
-    def first_translation_time(self):
-        return round((self.first_output_ts - self.first_input_ts) / 1000, 2)
-    def get_buffered_output(self) -> SpeechAndTextOutput:
-        now = time.time() * 1000
-        self.debug_log(f"get_buffered_output queue size: {self.output_queue.qsize()}")
-        while not self.output_queue.empty():
-            tmp_out = self.get_output_segment()
-            if tmp_out and tmp_out.compute_length(self.g2p) > 0:
-                if len(self.output_buffer) == 0:
-                    self.last_output_ts = now
-                self._populate_output_buffer(tmp_out)
-                self._increment_output_buffer_size(tmp_out)
-                if tmp_out.finished:
-                    self.debug_log("tmp_out.finished")
-                    res = self._gather_output_buffer_data(final=True)
-                    self.debug_log(f"gathered output data: {res}")
-                    self.output_buffer = []
-                    self.increment_output_buffer_size = 0
-                    self.last_output_ts = now
-                    self.first_output_ts = now
-                    return res
-            else:
-                self.debug_log("tmp_out.compute_length is not > 0")
-        if len(self.output_buffer) > 0 and (
-            now - self.last_output_ts >= self.output_buffer_idle_ms
-            or self.output_buffer_cur_size >= self.output_buffer_size_limit
-        ):
-            self.debug_log(
-                "[get_buffered_output] output_buffer is not empty. getting res to return."
-            )
-            self.last_output_ts = now
-            res = self._gather_output_buffer_data(final=False)
-            self.debug_log(f"gathered output data: {res}")
-            self.output_buffer = []
-            self.output_buffer_phoneme_count = 0
-            self.first_output_ts = now
-            return res
-        else:
-            self.debug_log("[get_buffered_output] output_buffer is empty...")
-            return None
-    def _gather_output_buffer_data(self, final):
-        output = SpeechAndTextOutput()
-        output.final = final
-        output = OutputSegments.join_output_buffer(self.output_buffer, output)
-        return output
-    def _increment_output_buffer_size(self, segment: OutputSegments):
-        self.output_buffer_cur_size += segment.compute_length(self.g2p)
-    def _populate_output_buffer(self, segment: OutputSegments):
-        self.output_buffer.append(segment.segments)
-    def _compute_phoneme_count(self, string: str) -> int:
-        return len([x for x in self.g2p(string) if x != " "])

backend/seamless/speech_and_text_output.py DELETED Viewed

@@ -1,15 +0,0 @@
-# Provides a container to return both speech and text output from our model at the same time
-class SpeechAndTextOutput:
-    def __init__(
-        self,
-        text: str = None,
-        speech_samples: list = None,
-        speech_sample_rate: float = None,
-        final: bool = False,
-    ):
-        self.text = text
-        self.speech_samples = speech_samples
-        self.speech_sample_rate = speech_sample_rate
-        self.final = final

backend/seamless/transcoder_helpers.py DELETED Viewed

@@ -1,43 +0,0 @@
-import logging
-logger = logging.getLogger("gunicorn")
-def get_transcoder_output_events(transcoder) -> list:
-    speech_and_text_output = transcoder.get_buffered_output()
-    if speech_and_text_output is None:
-        logger.debug("No output from transcoder.get_buffered_output()")
-        return []
-    logger.debug(f"We DID get output from the transcoder! {speech_and_text_output}")
-    lat = None
-    events = []
-    if speech_and_text_output.speech_samples:
-        events.append(
-            {
-                "event": "translation_speech",
-                "payload": speech_and_text_output.speech_samples,
-                "sample_rate": speech_and_text_output.speech_sample_rate,
-            }
-        )
-    if speech_and_text_output.text:
-        events.append(
-            {
-                "event": "translation_text",
-                "payload": speech_and_text_output.text,
-            }
-        )
-    for e in events:
-        e["eos"] = speech_and_text_output.final
-    # if not latency_sent:
-    #     lat = transcoder.first_translation_time()
-    #     latency_sent = True
-    #     to_send["latency"] = lat
-    return events

backend/seamless_utils.py DELETED Viewed

@@ -1,210 +0,0 @@
-# base seamless imports
-# ---------------------------------
-import io
-import json
-import matplotlib as mpl
-import matplotlib.pyplot as plt
-import mmap
-import numpy as np
-import soundfile
-import torchaudio
-import torch
-from pydub import AudioSegment
-# ---------------------------------
-# seamless-streaming specific imports
-# ---------------------------------
-import math
-from simuleval.data.segments import SpeechSegment, EmptySegment
-from seamless_communication.streaming.agents.seamless_streaming_s2st import (
-    SeamlessStreamingS2STVADAgent,
-)
-from simuleval.utils.arguments import cli_argument_list
-from simuleval import options
-from typing import Union, List
-from simuleval.data.segments import Segment, TextSegment
-from simuleval.agents.pipeline import TreeAgentPipeline
-from simuleval.agents.states import AgentStates
-# ---------------------------------
-# seamless setup
-# source: https://colab.research.google.com/github/kauterry/seamless_communication/blob/main/Seamless_Tutorial.ipynb?
-SAMPLE_RATE = 16000
-# PM - THis class is used to simulate the audio frontend in the seamless streaming pipeline
-# need to replace this with the actual audio frontend
-# TODO: replacement class that takes in PCM-16 bytes and returns SpeechSegment
-class AudioFrontEnd:
-    def __init__(self, wav_file, segment_size) -> None:
-        self.samples, self.sample_rate = soundfile.read(wav_file)
-        print(self.sample_rate, "sample rate")
-        assert self.sample_rate == SAMPLE_RATE
-        # print(len(self.samples), self.samples[:100])
-        self.samples = self.samples  # .tolist()
-        self.segment_size = segment_size
-        self.step = 0
-    def send_segment(self):
-        """
-        This is the front-end logic in simuleval instance.py
-        """
-        num_samples = math.ceil(self.segment_size / 1000 * self.sample_rate)
-        if self.step < len(self.samples):
-            if self.step + num_samples >= len(self.samples):
-                samples = self.samples[self.step :]
-                is_finished = True
-            else:
-                samples = self.samples[self.step : self.step + num_samples]
-                is_finished = False
-                self.samples = self.samples[self.step:]
-            self.step = min(self.step + num_samples, len(self.samples))
-            segment = SpeechSegment(
-                content=samples,
-                sample_rate=self.sample_rate,
-                finished=is_finished,
-            )
-        else:
-            # Finish reading this audio
-            segment = EmptySegment(
-                finished=True,
-            )
-            self.step = 0
-            self.samples = []
-        return segment
-        # samples = self.samples[:num_samples]
-        # self.samples = self.samples[num_samples:]
-        # segment = SpeechSegment(
-        #     content=samples,
-        #     sample_rate=self.sample_rate,
-        #     finished=False,
-        # )
-    def add_segments(self, wav):
-        new_samples, _ = soundfile.read(wav)
-        self.samples = np.concatenate((self.samples, new_samples))
-class OutputSegments:
-    def __init__(self, segments: Union[List[Segment], Segment]):
-        if isinstance(segments, Segment):
-            segments = [segments]
-        self.segments: List[Segment] = [s for s in segments]
-    @property
-    def is_empty(self):
-        return all(segment.is_empty for segment in self.segments)
-    @property
-    def finished(self):
-        return all(segment.finished for segment in self.segments)
-def get_audiosegment(samples, sr):
-    b = io.BytesIO()
-    soundfile.write(b, samples, samplerate=sr, format="wav")
-    b.seek(0)
-    return AudioSegment.from_file(b)
-def reset_states(system, states):
-    if isinstance(system, TreeAgentPipeline):
-        states_iter = states.values()
-    else:
-        states_iter = states
-    for state in states_iter:
-        state.reset()
-def get_states_root(system, states) -> AgentStates:
-    if isinstance(system, TreeAgentPipeline):
-        # self.states is a dict
-        return states[system.source_module]
-    else:
-        # self.states is a list
-        return system.states[0]
-def build_streaming_system(model_configs, agent_class):
-    parser = options.general_parser()
-    parser.add_argument("-f", "--f", help="a dummy argument to fool ipython", default="1")
-    agent_class.add_args(parser)
-    args, _ = parser.parse_known_args(cli_argument_list(model_configs))
-    system = agent_class.from_args(args)
-    return system
-def run_streaming_inference(system, audio_frontend, system_states, tgt_lang):
-    # NOTE: Here for visualization, we calculate delays offset from audio
-    # *BEFORE* VAD segmentation.
-    # In contrast for SimulEval evaluation, we assume audios are pre-segmented,
-    # and Average Lagging, End Offset metrics are based on those pre-segmented audios.
-    # Thus, delays here are *NOT* comparable to SimulEval per-segment delays
-    delays = {"s2st": [], "s2tt": []}
-    prediction_lists = {"s2st": [], "s2tt": []}
-    speech_durations = []
-    curr_delay = 0
-    target_sample_rate = None
-    while True:
-        input_segment = audio_frontend.send_segment()
-        input_segment.tgt_lang = tgt_lang
-        curr_delay += len(input_segment.content) / SAMPLE_RATE * 1000
-        if input_segment.finished:
-            # a hack, we expect a real stream to end with silence
-            get_states_root(system, system_states).source_finished = True
-        # Translation happens here
-        if isinstance(input_segment, EmptySegment):
-            return None, None, None, None
-        output_segments = OutputSegments(system.pushpop(input_segment, system_states))
-        if not output_segments.is_empty:
-            for segment in output_segments.segments:
-                # NOTE: another difference from SimulEval evaluation -
-                # delays are accumulated per-token
-                if isinstance(segment, SpeechSegment):
-                    pred_duration = 1000 * len(segment.content) / segment.sample_rate
-                    speech_durations.append(pred_duration)
-                    delays["s2st"].append(curr_delay)
-                    prediction_lists["s2st"].append(segment.content)
-                    target_sample_rate = segment.sample_rate
-                elif isinstance(segment, TextSegment):
-                    delays["s2tt"].append(curr_delay)
-                    prediction_lists["s2tt"].append(segment.content)
-                    print(curr_delay, segment.content)
-        if output_segments.finished:
-            reset_states(system, system_states)
-        if input_segment.finished:
-            # an assumption of SimulEval agents -
-            # once source_finished=True, generate until output translation is finished
-            break
-    return delays, prediction_lists, speech_durations, target_sample_rate
-def get_s2st_delayed_targets(delays, target_sample_rate, prediction_lists, speech_durations):
-    # get calculate intervals + durations for s2st
-    intervals = []
-    start = prev_end = prediction_offset = delays["s2st"][0]
-    target_samples = [0.0] * int(target_sample_rate * prediction_offset / 1000)
-    for i, delay in enumerate(delays["s2st"]):
-        start = max(prev_end, delay)
-        if start > prev_end:
-            # Wait source speech, add discontinuity with silence
-            target_samples += [0.0] * int(
-                target_sample_rate * (start - prev_end) / 1000
-            )
-        target_samples += prediction_lists["s2st"][i]
-        duration = speech_durations[i]
-        prev_end = start + duration
-        intervals.append([start, duration])
-    return target_samples, intervals

backend/tests/__pycache__/test_client.cpython-310-pytest-8.1.1.pyc DELETED Viewed

Binary file (6.82 kB)

backend/tests/__pycache__/test_main.cpython-310-pytest-8.1.1.pyc DELETED Viewed

Binary file (3.38 kB)

backend/tests/__pycache__/test_main.cpython-310.pyc DELETED Viewed

Binary file (2.2 kB)

backend/tests/silence.wav DELETED Viewed

Binary file (302 kB)

backend/tests/speaking.wav DELETED Viewed

Binary file (255 kB)

backend/tests/test_client.py DELETED Viewed

@@ -1,59 +0,0 @@
-import os
-import wave
-import pytest
-import torchaudio
-import os
-import sys
-current_dir = os.path.dirname(os.path.abspath(__file__))
-parent_dir = os.path.dirname(current_dir)
-sys.path.append(parent_dir)
-from Client import Client
-@pytest.fixture
-def mock_client():
-    client = Client("test_sid", "test_client_id", original_sr=44100)
-    return client
-def test_client_init(mock_client):
-    assert mock_client.sid == "test_sid"
-    assert mock_client.client_id == "test_client_id"
-    assert mock_client.call_id == None
-    assert mock_client.buffer == bytearray()
-    assert mock_client.output_path == "test_sid_output_audio.wav"
-    assert mock_client.target_language == None
-    assert mock_client.original_sr == 44100
-    assert mock_client.vad.sample_rate == 16000
-    assert mock_client.vad.frame_length == 25
-    assert mock_client.vad.frame_shift == 20
-    assert mock_client.vad.energy_threshold == 0.05
-    assert mock_client.vad.pre_emphasis == 0.95
-def test_client_add_bytes(mock_client):
-    mock_client.add_bytes(b"test")
-    assert mock_client.buffer == b"test"
-def test_client_resample_and_clear(mock_client):
-    location = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
-    speaking_bytes = wave.open(location + "/speaking.wav", "rb").readframes(-1)
-    mock_client.add_bytes(speaking_bytes)
-    resampled_waveform = mock_client.resample_and_clear()
-    torchaudio.save(location + "testoutput.wav", resampled_waveform, 16000)
-    with wave.open(location + "testoutput.wav", "rb") as wf:
-        sample_rate = wf.getframerate()
-    assert mock_client.buffer == bytearray()
-    assert sample_rate == 16000
-def test_client_vad(mock_client):
-    location = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
-    speaking_bytes = wave.open(location + "/speaking.wav", "rb").readframes(-1)
-    mock_client.add_bytes(speaking_bytes)
-    resampled_waveform = mock_client.resample_and_clear()
-    assert mock_client.buffer == bytearray()
-    assert mock_client.vad_analyse(resampled_waveform) == True
-    silent_bytes = wave.open(location + "/silence.wav", "rb").readframes(-1)
-    mock_client.add_bytes(silent_bytes)
-    resampled_waveform = mock_client.resample_and_clear()
-    assert mock_client.buffer == bytearray()
-    assert mock_client.vad_analyse(resampled_waveform) == False

backend/tests/test_main.py DELETED Viewed

@@ -1,90 +0,0 @@
-from fastapi import FastAPI
-import pytest
-from unittest.mock import AsyncMock, MagicMock, ANY
-import socketio
-import os
-import sys
-current_dir = os.path.dirname(os.path.abspath(__file__))
-parent_dir = os.path.dirname(current_dir)
-sys.path.append(parent_dir)
-from Client import Client
-from main import sio, connect, disconnect, target_language, call_user, answer_call, incoming_audio, clients, rooms
-from unittest.mock import patch
-sio = socketio.AsyncServer(
-    async_mode="asgi",
-    cors_allowed_origins="*",
-    # engineio_logger=logger,
-)
-# sio.logger.setLevel(logging.DEBUG)
-socketio_app = socketio.ASGIApp(sio)
-app = FastAPI()
-app.mount("/", socketio_app)
-@pytest.fixture(autouse=True)
-def setup_clients_and_rooms():
-    global clients, rooms
-    clients.clear()
-    rooms.clear()
-    yield
-@pytest.fixture
-def mock_client():
-    client = Client("test_sid", "test_client_id", original_sr=44100)
-    return client
-@pytest.mark.asyncio
-async def test_connect(mock_client):
-    sid = mock_client.sid
-    environ = {'QUERY_STRING': 'client_id=test_client_id'}
-    await connect(sid, environ)
-    assert sid in clients
-@pytest.mark.asyncio
-async def test_disconnect(mock_client):
-    sid = mock_client.sid
-    clients[sid] = mock_client
-    await disconnect(sid)
-    assert sid not in clients
-@pytest.mark.asyncio
-async def test_target_language(mock_client):
-    sid = mock_client.sid
-    clients[sid] = mock_client
-    target_lang = "fr"
-    await target_language(sid, target_lang)
-    assert clients[sid].target_language == "fr"
-# PM - issues with socketio enter_room in these tests
-# @pytest.mark.asyncio
-# async def test_call_user(mock_client):
-#     sid = mock_client.sid
-#     clients[sid] = mock_client
-#     call_id = "1234"
-#     await call_user(sid, call_id)
-#     assert call_id in rooms
-#     assert sid in rooms[call_id]
-# @pytest.mark.asyncio
-# async def test_answer_call(mock_client):
-#     sid = mock_client.sid
-#     clients[sid] = mock_client
-#     call_id = "1234"
-#     await answer_call(sid, call_id)
-#     assert call_id in rooms
-#     assert sid in rooms[call_id]
-@pytest.mark.asyncio
-async def test_incoming_audio(mock_client):
-    sid = mock_client.sid
-    clients[sid] = mock_client
-    data = b"\x01"
-    call_id = "1234"
-    await incoming_audio(sid, data, call_id)
-    assert clients[sid].get_length() != 0

backend/utils/__pycache__/text_rank.cpython-310.pyc DELETED Viewed

Binary file (2.03 kB)

backend/utils/text_rank.py DELETED Viewed

@@ -1,60 +0,0 @@
-import spacy
-import pytextrank
-from spacy.tokens import Span
-# Define decorator for converting to singular version of words
-@spacy.registry.misc("plural_scrubber")
-def plural_scrubber():
-    def scrubber_func(span: Span) -> str:
-        return span.lemma_
-    return scrubber_func
-def model_selector(target_language: str):
-    # Load subset of non-english models
-    language_model = {
-        "spa": "es_core_news_sm",
-        "fra": "fr_core_news_sm",
-        "pol": "pl_core_news_sm",
-        "deu": "de_core_news_sm",
-        "ita": "it_core_news_sm",
-        "por": "pt_core_news_sm",
-        "nld": "nl_core_news_sm",
-        "fin": "fi_core_news_sm",
-        "ron": "ro_core_news_sm",
-        "rus": "ru_core_news_sm"
-    }
-    try:
-        nlp = spacy.load(language_model[target_language])
-    except KeyError:
-        # Load a spaCy English model
-        nlp = spacy.load("en_core_web_lg")
-    # Add TextRank component to pipeline with stopwords
-    nlp.add_pipe("textrank", config={
-                            "stopwords": {token:["NOUN"] for token in nlp.Defaults.stop_words},
-                            "scrubber": {"@misc": "plural_scrubber"}})
-    return nlp
-def extract_terms(text, target_language, length):
-    nlp = model_selector(target_language)
-    # Perform fact extraction on overall summary and segment summaries
-    doc = nlp(text)
-    if length < 100:
-        # Get single most used key term
-        phrases = {phrase.text for phrase in doc._.phrases[:1]}
-    elif length > 100 and length < 300:
-        # Create unique set from top 2 ranked phrases
-        phrases = {phrase.text for phrase in doc._.phrases[:2]}
-    if length > 300:
-        # Create unique set from top 3 ranked phrases
-        phrases = {phrase.text for phrase in doc._.phrases[:3]}
-    return list(phrases)