Spaces:

benjolo
/

InterpreTalk

Paused

+from typing import Tuple
+import wave
+import os
+import torchaudio
+from vad import EnergyVAD
+TARGET_SAMPLING_RATE = 16000
+def create_frames(data: bytes, frame_duration: int) -> Tuple[bytes]:
+    frame_size = int(TARGET_SAMPLING_RATE * (frame_duration / 1000))
+    return (data[i:i + frame_size] for i in range(0, len(data), frame_size)), frame_size
+def detect_activity(energies: list):
+    if sum(energies) < len(energies) / 12:
+        return False
+    count = 0
+    for energy in energies:
+        if energy == 1:
+            count += 1
+            if count == 12:
+                return True
+        else:
+            count = 0
+    return False
+class Client:
+    def __init__(self, sid, client_id, username, call_id=None, original_sr=None):
+        self.sid = sid
+        self.client_id = client_id
+        self.username = username,
+        self.call_id = call_id
+        self.buffer = bytearray()
+        self.output_path = self.sid + "_output_audio.wav"
+        self.target_language = None
+        self.original_sr = original_sr
+        self.vad = EnergyVAD(
+            sample_rate=TARGET_SAMPLING_RATE,
+            frame_length=25,
+            frame_shift=20,
+            energy_threshold=0.05,
+            pre_emphasis=0.95,
+        ) # PM - Default values given in the docs for this class
+    def add_bytes(self, new_bytes):
+        self.buffer += new_bytes
+    def resample_and_clear(self):
+        print(f"📥 [ClientAudioBuffer] Writing {len(self.buffer)} bytes to {self.output_path}")
+        with wave.open(self.sid + "_OG.wav", "wb") as wf:
+            wf.setnchannels(1)
+            wf.setsampwidth(2)
+            wf.setframerate(self.original_sr)
+            wf.setnframes(0)
+            wf.setcomptype("NONE", "not compressed")
+            wf.writeframes(self.buffer)
+        waveform, sample_rate = torchaudio.load(self.sid + "_OG.wav")
+        resampler = torchaudio.transforms.Resample(sample_rate, TARGET_SAMPLING_RATE, dtype=waveform.dtype)
+        resampled_waveform = resampler(waveform)
+        self.buffer = bytearray()
+        return resampled_waveform
+    def vad_analyse(self, resampled_waveform):
+        torchaudio.save(self.output_path, resampled_waveform, TARGET_SAMPLING_RATE)
+        vad_array = self.vad(resampled_waveform)
+        # print(f"VAD OUTPUT: {vad_array}")
+        return detect_activity(vad_array)
+    def write_to_file(self, resampled_waveform):
+        torchaudio.save(self.output_path, resampled_waveform, TARGET_SAMPLING_RATE)
+    def get_length(self):
+        return len(self.buffer)
+    def __del__(self):
+        if len(self.buffer) > 0:
+            print(f"🚨 [ClientAudioBuffer] Buffer not empty for {self.sid} ({len(self.buffer)} bytes)!")
+        if os.path.exists(self.output_path):
+            os.remove(self.output_path)
+        if os.path.exists(self.sid + "_OG.wav"):
+            os.remove(self.sid + "_OG.wav")

backend/__pycache__/Client.cpython-310.pyc ADDED Viewed

Binary file (3.37 kB). View file

backend/__pycache__/main.cpython-310.pyc ADDED Viewed

Binary file (8.57 kB). View file

backend/logging.yaml ADDED Viewed

	@@ -0,0 +1,22 @@

+version: 1
+disable_existing_loggers: false
+formatters:
+  standard:
+    format: "%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s"
+handlers:
+  console:
+    class: logging.StreamHandler
+    formatter: standard
+    stream: ext://sys.stdout
+loggers:
+  uvicorn:
+    error:
+      propagate: true
+root:
+  level: INFO
+  handlers: [console]
+  propagate: no

backend/main.py ADDED Viewed

	@@ -0,0 +1,344 @@

+from operator import itemgetter
+import os
+from datetime import datetime
+import uvicorn
+from typing import Any, Optional, Tuple, Dict, TypedDict
+from urllib import parse
+from uuid import uuid4
+import logging
+from fastapi.logger import logger as fastapi_logger
+import sys
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi import APIRouter, Body, Request, status
+from pymongo import MongoClient
+from dotenv import dotenv_values
+from routes import router as api_router
+from contextlib import asynccontextmanager
+import requests
+from typing import List
+from datetime import date
+from mongodb.operations.calls import *
+from mongodb.operations.users import *
+from mongodb.models.calls import UserCall, UpdateCall
+# from mongodb.endpoints.calls import *
+from transformers import AutoProcessor, SeamlessM4Tv2Model
+# from seamless_communication.inference import Translator
+from Client import Client
+import numpy as np
+import torch
+import socketio
+# Configure logger
+gunicorn_error_logger = logging.getLogger("gunicorn.error")
+gunicorn_logger = logging.getLogger("gunicorn")
+uvicorn_access_logger = logging.getLogger("uvicorn.access")
+gunicorn_error_logger.propagate = True
+gunicorn_logger.propagate = True
+uvicorn_access_logger.propagate = True
+uvicorn_access_logger.handlers = gunicorn_error_logger.handlers
+fastapi_logger.handlers = gunicorn_error_logger.handlers
+# sio is the main socket.io entrypoint
+sio = socketio.AsyncServer(
+    async_mode="asgi",
+    cors_allowed_origins="*",
+    logger=gunicorn_logger,
+    engineio_logger=gunicorn_logger,
+)
+# sio.logger.setLevel(logging.DEBUG)
+socketio_app = socketio.ASGIApp(sio)
+# app.mount("/", socketio_app)
+# config = dotenv_values(".env")
+# Read connection string from environment vars
+uri = os.environ['MONGODB_URI']
+# Read connection string from .env file
+# uri = config['MONGODB_URI']
+# MongoDB Connection Lifespan Events
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    # startup logic
+    app.mongodb_client = MongoClient(uri)
+    app.database = app.mongodb_client['IT-Cluster1'] #connect to interpretalk primary db
+    try:
+        app.mongodb_client.admin.command('ping')
+        print("MongoDB Connection Established...")
+    except Exception as e:
+        print(e)
+    yield
+    # shutdown logic
+    print("Closing MongoDB Connection...")
+    app.mongodb_client.close()
+app = FastAPI(lifespan=lifespan, logger=gunicorn_logger)
+# New CORS funcitonality
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"], # configured node app port
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+app.include_router(api_router) # include routers for user, calls and transcripts operations
+DEBUG = True
+ESCAPE_HATCH_SERVER_LOCK_RELEASE_NAME = "remove_server_lock"
+TARGET_SAMPLING_RATE = 16000
+MAX_BYTES_BUFFER = 960_000
+print("")
+print("")
+print("=" * 18 + " Interpretalk is starting... " + "=" * 18)
+###############################################
+# Configure socketio server
+###############################################
+# TODO PM - change this to the actual path
+# seamless remnant code
+CLIENT_BUILD_PATH = "../streaming-react-app/dist/"
+static_files = {
+    "/": CLIENT_BUILD_PATH,
+    "/assets/seamless-db6a2555.svg": {
+        "filename": CLIENT_BUILD_PATH + "assets/seamless-db6a2555.svg",
+        "content_type": "image/svg+xml",
+    },
+}
+# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+device = torch.device("cpu")
+processor = AutoProcessor.from_pretrained("facebook/seamless-m4t-v2-large")
+# PM - hardcoding temporarily as my GPU doesnt have enough vram
+model = SeamlessM4Tv2Model.from_pretrained("facebook/seamless-m4t-v2-large").to(device)
+bytes_data = bytearray()
+model_name = "seamlessM4T_v2_large"
+vocoder_name = "vocoder_v2" if model_name == "seamlessM4T_v2_large" else "vocoder_36langs"
+clients = {}
+rooms = {}
+def get_collection_users():
+    return app.database["user_records"]
+def get_collection_calls():
+    return app.database["call_records"]
+@app.get("/", response_description="Welcome User")
+def test():
+    return {"message": "Welcome to InterpreTalk!"}
+async def send_translated_text(client_id, username, original_text, translated_text, room_id):
+    # print(rooms) # Debugging
+    # print(clients) # Debugging
+    data = {
+        "author_id": str(client_id),
+        "author_username": str(username),
+        "original_text": str(original_text),
+        "translated_text": str(translated_text),
+        "timestamp": str(datetime.now())
+    }
+    gunicorn_logger.info("SENDING TRANSLATED TEXT TO CLIENT")
+    await sio.emit("translated_text", data, room=room_id)
+    gunicorn_logger.info("SUCCESSFULLY SEND AUDIO TO FRONTEND")
+@sio.on("connect")
+async def connect(sid, environ):
+    print(f"📥 [event: connected] sid={sid}")
+    query_params = dict(parse.parse_qsl(environ["QUERY_STRING"]))
+    client_id = query_params.get("client_id")
+    gunicorn_logger.info(f"📥 [event: connected] sid={sid}, client_id={client_id}")
+    # get username to Client Object from DB
+    username = find_name_from_id(get_collection_users(), client_id)
+    # sid = socketid, client_id = client specific ID ,always the same for same user
+    clients[sid] = Client(sid, client_id, username)
+    print(clients[sid].username)
+    gunicorn_logger.warning(f"Client connected: {sid}")
+    gunicorn_logger.warning(clients)
+@sio.on("disconnect")
+async def disconnect(sid):
+    gunicorn_logger.debug(f"📤 [event: disconnected] sid={sid}")
+    call_id = clients[sid].call_id
+    user_id = clients[sid].client_id
+    target_language = clients[sid].target_language
+    clients.pop(sid, None)
+    # Perform Key Term Extraction and summarisation
+    try:
+        # Get combined caption field for call record based on call_id
+        key_terms = term_extraction(get_collection_calls(), call_id, user_id, target_language)
+        # Perform summarisation based on target language
+        summary_result = summarise(get_collection_calls(), call_id, user_id, target_language)
+    except:
+        gunicorn_logger.error(f"📤 [event: term_extraction/summarisation request error] sid={sid}, call={call_id}")
+@sio.on("target_language")
+async def target_language(sid, target_lang):
+    gunicorn_logger.info(f"📥 [event: target_language] sid={sid}, target_lang={target_lang}")
+    clients[sid].target_language = target_lang
+@sio.on("call_user")
+async def call_user(sid, call_id):
+    clients[sid].call_id = call_id
+    gunicorn_logger.info(f"CALL {sid}: entering room {call_id}")
+    rooms[call_id] = rooms.get(call_id, [])
+    if sid not in rooms[call_id] and len(rooms[call_id]) < 2:
+        rooms[call_id].append(sid)
+        sio.enter_room(sid, call_id)
+    else:
+        gunicorn_logger.info(f"CALL {sid}: room {call_id} is full")
+        # await sio.emit("room_full", room=call_id, to=sid)
+    # BO - Get call id from dictionary created during socketio connection
+    client_id = clients[sid].client_id
+    gunicorn_logger.warning(f"NOW TRYING TO CREATE DB RECORD FOR Caller with ID: {client_id} for call: {call_id}")
+    # BO -> Create Call Record with Caller and call_id field (None for callee, duration, terms..)
+    request_data = {
+        "call_id": str(call_id),
+        "caller_id": str(client_id),
+        "creation_date": str(datetime.now())
+    }
+    response =  create_calls(get_collection_calls(), request_data)
+    print(response) # BO - print created db call record
+@sio.on("audio_config")
+async def audio_config(sid, sample_rate):
+    clients[sid].original_sr = sample_rate
+@sio.on("answer_call")
+async def answer_call(sid, call_id):
+    clients[sid].call_id = call_id
+    gunicorn_logger.info(f"ANSWER {sid}: entering room {call_id}")
+    rooms[call_id] = rooms.get(call_id, [])
+    if sid not in rooms[call_id] and len(rooms[call_id]) < 2:
+        rooms[call_id].append(sid)
+        sio.enter_room(sid, call_id)
+    else:
+        gunicorn_logger.info(f"ANSWER {sid}: room {call_id} is full")
+        # await sio.emit("room_full", room=call_id, to=sid)
+    # BO - Get call id from dictionary created during socketio connection
+    client_id = clients[sid].client_id
+    # BO -> Update Call Record with Callee field based on call_id
+    gunicorn_logger.warning(f"NOW UPDATING MongoDB RECORD FOR Caller with ID: {client_id} for call: {call_id}")
+    # BO -> Create Call Record with callee_id field (None for callee, duration, terms..)
+    request_data = {
+        "callee_id": client_id
+    }
+    response =  update_calls(get_collection_calls(), call_id, request_data)
+    print(response) # BO - print created db call record
+@sio.on("incoming_audio")
+async def incoming_audio(sid, data, call_id):
+    try:
+        clients[sid].add_bytes(data)
+        if clients[sid].get_length() >= MAX_BYTES_BUFFER:
+            gunicorn_logger.info('Buffer full, now outputting...')
+            output_path = clients[sid].output_path
+            resampled_audio = clients[sid].resample_and_clear()
+            vad_result = clients[sid].vad_analyse(resampled_audio)
+            # source lang is speakers tgt language 😃
+            src_lang = clients[sid].target_language
+            if vad_result:
+                gunicorn_logger.info('Speech detected, now processing audio.....')
+                tgt_sid = next(id for id in rooms[call_id] if id != sid)
+                tgt_lang = clients[tgt_sid].target_language
+                # following example from https://github.com/facebookresearch/seamless_communication/blob/main/docs/m4t/README.md#transformers-usage
+                output_tokens = processor(audios=resampled_audio, src_lang=src_lang, return_tensors="pt", sampling_rate=TARGET_SAMPLING_RATE).to(device)
+                model_output = model.generate(**output_tokens, tgt_lang=src_lang, generate_speech=False)[0].tolist()[0]
+                asr_text = processor.decode(model_output, skip_special_tokens=True)
+                print(f"ASR TEXT = {asr_text}")
+                # ASR TEXT => ORIGINAL TEXT
+                if src_lang != tgt_lang:
+                    t2t_tokens = processor(text=asr_text, src_lang=src_lang, tgt_lang=tgt_lang, return_tensors="pt").to(device)
+                    translated_data = model.generate(**t2t_tokens, tgt_lang=tgt_lang, generate_speech=False)[0].tolist()[0]
+                    translated_text = processor.decode(translated_data, skip_special_tokens=True)
+                    print(f"TRANSLATED TEXT = {translated_text}")
+                else:
+                    # PM - both users have same language selected, no need to translate
+                    translated_text = asr_text
+                # PM - text_output is a list with 1 string
+                await send_translated_text(clients[sid].client_id, clients[sid].username, asr_text, translated_text, call_id)
+                # BO -> send translated_text to mongodb as caption record update based on call_id
+                await send_captions(clients[sid].client_id, clients[sid].username, asr_text, translated_text, call_id)
+    except Exception as e:
+        gunicorn_logger.error(f"Error in incoming_audio: {e.with_traceback()}")
+async def send_captions(client_id, username, original_text, translated_text, call_id):
+    # BO -> Update Call Record with Callee field based on call_id
+    print(f"Now updating Caption field in call record for Caller with ID: {client_id} for call: {call_id}")
+    data = {
+        "author_id": str(client_id),
+        "author_username": str(username),
+        "original_text": str(original_text),
+        "translated_text": str(translated_text),
+        "timestamp": str(datetime.now())
+    }
+    response = update_captions(get_collection_calls(), get_collection_users(), call_id, data)
+    return response
+app.mount("/", socketio_app)
+if __name__ == '__main__':
+    uvicorn.run("main:app", host='0.0.0.0', port=7860, log_level="info")
+# Running in Docker Container
+if __name__ != "__main__":
+    fastapi_logger.setLevel(gunicorn_logger.level)
+else:
+    fastapi_logger.setLevel(logging.DEBUG)

backend/mongodb/endpoints/__pycache__/calls.cpython-310.pyc ADDED Viewed

Binary file (3.77 kB). View file

backend/mongodb/endpoints/__pycache__/users.cpython-310.pyc ADDED Viewed

Binary file (2.01 kB). View file

backend/mongodb/endpoints/calls.py ADDED Viewed

	@@ -0,0 +1,74 @@

+from fastapi import APIRouter, Body, Request, status, HTTPException
+from typing import List
+from datetime import date
+import sys
+from ..operations import calls as calls
+from ..models.calls import UserCaptions, UserCall, UpdateCall
+from ..endpoints.users import get_collection_users
+router = APIRouter(prefix="/call",
+    tags=["Calls"])
+def get_collection_calls(request: Request):
+  try:
+    return request.app.database["call_records"]
+  except:
+      raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Unable to find call records Database.")
+@router.post("/create-call", response_description="Create a new user call record", status_code=status.HTTP_201_CREATED, response_model=UserCall)
+async def create_calls(request: Request, user_calls: UserCall = Body(...)):
+    collection = get_collection_calls(request)
+    return calls.create_calls(collection, user_calls)
+@router.get("/find-call/{call_id}", response_description="Find user's calls based on User ID", response_model=UserCall)
+async def find_call(request: Request, call_id: str):
+    collection = get_collection_calls(request)
+    return calls.find_call(collection, call_id)
+@router.get("/find-user-calls/{user_id}", response_description="Find user's calls based on User ID", response_model=List[UserCall])
+async def find_user_calls(request: Request, user_id: str):
+    collection = get_collection_calls(request)
+    return calls.find_user_calls(collection, user_id)
+@router.get("/get-captions/{call_id}/{user_id}", response_description="Find user's calls based on User ID")
+async def get_caption_text(request: Request, call_id: str, user_id: str):
+    collection = get_collection_calls(request)
+    return calls.get_caption_text(collection, call_id, user_id)
+@router.get("/find-duration/{min_len}/{max_len}", response_description="Find calls based on call duration in minutes", response_model=List[UserCall])
+async def list_transcripts_by_duration(request: Request, min_len: int, max_len: int):
+    collection = get_collection_calls(request)
+    return calls.list_transcripts_by_duration(collection, min_len, max_len)
+@router.put("/update-call/{call_id}", response_description="Update an existing call", response_model=UpdateCall)
+async def update_calls(request: Request, call_id: str, user_calls: UpdateCall = Body(...)):
+    collection = get_collection_calls(request)
+    return calls.update_calls(collection, call_id, user_calls)
+@router.put("/update-captions/{call_id}", response_description="Update an existing call", response_model=UpdateCall)
+async def update_captions(request: Request, call_id: str, user_calls: UserCaptions = Body(...)):
+    call_collection = get_collection_calls(request)
+    user_collection = get_collection_users(request)
+    return calls.update_captions(call_collection, user_collection, call_id, user_calls)
+@router.delete("/delete-call/{call_id}", response_description="Delete a call by its id")
+async def delete_call(request: Request, call_id: str):
+    collection = get_collection_calls(request)
+    return calls.delete_calls(collection, call_id)
+@router.get("/fuzzy-search/{user_id}/{query}", response_description="Perform fuzzy text search on caption fields", response_model=List[UserCall])
+async def fuzzy_search(request: Request, user_id: str, query: str):
+    collection = get_collection_calls(request)
+    return calls.fuzzy_search(collection, user_id, query)
+@router.get("/summarise/{call_id}/{user_id}/{target_language}", response_description="Perform gpt-3.5 summarisation on call_id")
+async def summarise(request: Request, call_id: str, user_id: str, target_language: str):
+    collection = get_collection_calls(request)
+    return calls.summarise(collection, call_id, user_id, target_language)
+@router.get("/term-extraction/{call_id}/{user_id}/{target_language}", response_description="Perform key term extraction on call record")
+async def term_extraction(request: Request, call_id: str, user_id: str, target_language: str):
+    collection = get_collection_calls(request)
+    return calls.term_extraction(collection, call_id, user_id, target_language)

backend/mongodb/endpoints/users.py ADDED Viewed

	@@ -0,0 +1,43 @@

+from fastapi import APIRouter, Body, Request, status, HTTPException
+from typing import List
+import sys
+from ..models.users import User, UpdateUser
+from ..operations import users as users
+router = APIRouter(prefix="/user",
+    tags=["User"])
+def get_collection_users(request: Request):
+  db = request.app.database["user_records"]
+  return db
+@router.post("/", response_description="Create a new user", status_code=status.HTTP_201_CREATED, response_model=User)
+async def create_user(request: Request, user: User = Body(...)):
+    collection = get_collection_users(request)
+    return users.create_user(collection, user)
+@router.get("/", response_description="List users", response_model=List[User])
+async def list_users(request: Request):
+    collection = get_collection_users(request)
+    return users.list_users(collection, 100)
+@router.put("/{user_id}", response_description="Update a User", response_model=UpdateUser)
+async def update_user(request: Request, user_id: str, user: UpdateUser = Body(...)):
+    collection = get_collection_users(request)
+    return users.update_user(collection, user_id, user)
+@router.get("/{user_id}", response_description="Get a single user by id", response_model=User)
+async def find_user(request: Request, user_id: str):
+    collection = get_collection_users(request)
+    return users.find_user(collection, user_id)
+@router.get("/find-name-id/{user_id}", response_description="Get a username from user id")
+async def find_name_from_id(request: Request, user_id: str):
+    collection = get_collection_users(request)
+    return users.find_name_from_id(collection, user_id)
+@router.delete("/{user_id}", response_description="Delete a user")
+async def delete_user(request: Request, user_id:str):
+    collection = get_collection_users(request)
+    return users.delete_user(collection, user_id)

backend/mongodb/models/__pycache__/calls.cpython-310.pyc ADDED Viewed

Binary file (3.01 kB). View file

backend/mongodb/models/__pycache__/users.cpython-310.pyc ADDED Viewed

Binary file (1.52 kB). View file

backend/mongodb/models/calls.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import uuid
+from typing import List, Dict, Optional
+from datetime import datetime
+from pydantic import BaseModel, Field, PrivateAttr
+import sys
+''' Class for storing captions generated by SeamlessM4T'''
+class UserCaptions(BaseModel):
+    _id: uuid.UUID = PrivateAttr(default_factory=uuid.uuid4) # private attr not included in http calls
+    author_id: Optional[str] = None
+    author_username: Optional[str] = None
+    original_text: str
+    translated_text: str
+    timestamp: datetime = Field(default_factory=datetime.now)
+    class Config:
+        populate_by_name = True
+        json_schema_extra = {
+            "example": {
+                "author_id": "gLZrfTwXyLUPB3eT7xT2HZnZiZT2",
+                "author_username": "shamzino",
+                "original_text": "eng: This is original_text english text",
+                "translated_text": "spa: este es el texto traducido al español",
+                "timestamp": "2024-03-28T16:15:50.956055",
+            }
+        }
+'''Class for storing past call records from users'''
+class UserCall(BaseModel):
+    _id: uuid.UUID = PrivateAttr(default_factory=uuid.uuid4)
+    call_id: Optional[str] = None
+    caller_id: Optional[str] = None
+    callee_id: Optional[str] = None
+    creation_date: datetime = Field(default_factory=datetime.now, alias="date")
+    captions: Optional[List[UserCaptions]] = None
+    key_terms: Optional[dict] = None
+    summaries: Optional[dict] = None
+    class Config:
+        populate_by_name = True
+        json_schema_extra = {
+            "example": {
+                "call_id": "65eef930e9abd3b1e3506906",
+                "caller_id": "65ede65b6d246e52aaba9d4f",
+                "callee_id": "65edda944340ac84c1f00758",
+                "captions": [{"author_id": "gLZrfTwXyLUPB3eT7xT2HZnZiZT2", "author_username": "shamzino", "original_text": "eng: This is original_text english text", "translated_text": "spa: este es el texto traducido al español", "timestamp": "2024-03-28T16:15:50.956055"},
+                             {"author_id": "g7pR1qCibzQf5mDP9dGtcoWeEc92", "author_username": "benjino", "original_text": "eng: This is source english text", "translated_text": "spa: este es el texto fuente al español",  "timestamp": "2024-03-28T16:16:20.34625"}],
+                "key_terms": {"gLZrfTwXyLUPB3eT7xT2HZnZiZT2": ["original_text", "source", "english", "text"], "g7pR1qCibzQf5mDP9dGtcoWeEc92": ["translated_text", "destination", "spanish", "text"]},
+                "summaries": {"gLZrfTwXyLUPB3eT7xT2HZnZiZT2": "This is a short test on lanuguage translation", "65edda944340ac84c1f00758": "Esta es una breve prueba sobre traducción de idiomas."}
+            }
+        }
+''' Class for updating User Call record'''
+class UpdateCall(BaseModel):
+    call_id: Optional[str] = None
+    caller_id: Optional[str] = None
+    callee_id: Optional[str] = None
+    captions: Optional[List[UserCaptions]] = None
+    key_terms: Optional[List[str]] = None
+    class Config:
+        populate_by_name = True
+        json_schema_extra = {
+            "example": {
+                "duration": "500"
+            }
+        }

backend/mongodb/models/users.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import uuid
+from typing import List, Optional
+from pydantic import BaseModel, Field, SecretStr, PrivateAttr
+from pydantic.networks import EmailStr
+'''Class for user model used to relate users to past calls'''
+class User(BaseModel):
+    _id: uuid.UUID = PrivateAttr(default_factory=uuid.uuid4) # private attr not included in http calls
+    user_id: str
+    name: str
+    email: EmailStr = Field(unique=True, index=True)
+    class Config:
+        populate_by_name = True
+        json_schema_extra = {
+            "example": {
+                "user_id": "65ede65b6d246e52aaba9d4f",
+                "name": "benjolo",
+                "email": "benjolounchained@gmail.com"
+            }
+        }
+'''Class for updating user records'''
+class UpdateUser(BaseModel):
+    user_id: Optional[str] = None
+    name: Optional[str] = None
+    email: Optional[EmailStr] = None
+    class Config:
+        populate_by_name = True
+        json_schema_extra = {
+            "example": {
+                "email": "benjolounchained21@gmail.com"
+            }
+        }

backend/mongodb/operations/__pycache__/calls.cpython-310.pyc ADDED Viewed

Binary file (5.01 kB). View file

backend/mongodb/operations/__pycache__/users.cpython-310.pyc ADDED Viewed

Binary file (2.89 kB). View file

backend/mongodb/operations/calls.py ADDED Viewed

	@@ -0,0 +1,197 @@

+from fastapi import Body, Request, HTTPException, status
+from fastapi.encoders import jsonable_encoder
+import sys
+from ..models.calls import UpdateCall, UserCall, UserCaptions
+from ..operations.users import *
+from utils.text_rank import extract_terms
+from openai import OpenAI
+from time import sleep
+import os
+from dotenv import dotenv_values
+# Used within calls to create call record in main.py
+def create_calls(collection, user: UserCall = Body(...)):
+    calls = jsonable_encoder(user)
+    new_calls = collection.insert_one(calls)
+    created_calls = collection.find_one({"_id": new_calls.inserted_id})
+    return created_calls
+'''Finding calls based on call id'''
+def find_call(collection, call_id: str):
+    user_calls = collection.find_one({"call_id": call_id})
+    if user_calls is not None:
+        return user_calls
+    else:
+        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call with ID: '{call_id}' not found.")
+'''Finding calls based on user id'''
+def find_user_calls(collection, user_id: str):
+    user_calls = list(collection.find({"$or": [{"caller_id": user_id}, {"callee_id": user_id}]})) # match on caller or callee ID
+    if len(user_calls):
+        return user_calls
+    else:
+        return [] # return empty list if no existing calls for TranscriptView frontend component
+def update_calls(collection, call_id: str, calls: UpdateCall = Body(...)):
+    calls = {k: v for k, v in calls.items() if v is not None}
+    print(calls)
+    if len(calls) >= 1:
+        update_result = collection.update_one({"call_id": call_id}, {"$set": calls})
+        if update_result.modified_count == 0:
+            raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call not updated!")
+    if (existing_item := collection.find_one({"call_id": call_id})) is not None:
+        return existing_item
+    raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call not found!")
+def update_captions(call_collection, user_collection, call_id: str, captions: UserCaptions = Body(...)):
+    captions = {k: v for k, v in captions.items() if v is not None}
+    # index user_id from caption object
+    userID = captions["author_id"]
+    # use user id to get user name
+    username = find_name_from_id(user_collection, userID)
+    # add user name to captions json/object
+    captions["author_username"] = username
+    if len(captions) >= 1:
+        update_result = call_collection.update_one({"call_id": call_id},
+                                              {"$push": {"captions": captions}})
+        if update_result.modified_count == 0:
+            raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Captions not updated!")
+    if (existing_item := call_collection.find_one({"call_id": call_id})) is not None:
+        return existing_item
+    raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Captions not found!")
+def delete_calls(collection, call_id: str):
+    deleted_calls = collection.delete_one({"call_id": call_id})
+    if deleted_calls.deleted_count == 1:
+        return f"Call deleted sucessfully!"
+    raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call not found!")
+def get_caption_text(collection, call_id, user_id):
+    call_record = find_call((collection), call_id)
+    try: # Check if call has any captions first
+        caption_records = call_record['captions']
+    except KeyError:
+        return None
+    combined_text = []
+    for caption_segment in caption_records:
+        if caption_segment['author_id'] == user_id:
+            combined_text.append(caption_segment['original_text'])
+        else:
+            combined_text.append(caption_segment['translated_text'])
+    return " ".join(combined_text)
+# approximate string matching
+def fuzzy_search(collection, user_id, query):
+    # drop any existing indexes and create new one
+    collection.drop_indexes()
+    collection.create_index([('captions.original_text', 'text'), ('captions.tranlated_text', 'text')],
+                            name='captions')
+    pipeline = [
+        {
+            "$search": {
+                "text": {
+                    "query": query,
+                    "path": {"wildcard": "*"},
+                    "fuzzy": {}
+                }
+            }
+        }
+    ]
+    collection_results = list(collection.aggregate(pipeline))
+    # add all users records to output
+    records = []
+    for doc in collection_results:
+        if doc['caller_id'] == user_id or doc['callee_id'] == user_id:
+            records.append(doc)
+    return records
+def summarise(collection, call_id, user_id, target_language):
+    # client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
+    config = dotenv_values(".env")
+    client = OpenAI(api_key=config["OPENAI_API_KEY"])
+    # get caption text using call_id
+    caption_text = get_caption_text(collection, call_id, user_id)
+    chat_completion = client.chat.completions.create(
+        messages=[
+            {
+                "role": "user",
+                "content": f"The following is an extract from a call transcript. Rewrite this as a structured, clear summary in {target_language}. \
+                            \n\Call Transcript: \"\"\"\n{caption_text}\n\"\"\"\n"
+            }
+        ],
+        model="gpt-3.5-turbo",
+    )
+    # Gpt-3.5 turbo has 4096 token limit -> request will fail if exceeded
+    try:
+        result = chat_completion.choices[0].message.content
+    except:
+        return None
+    # BO - add result to mongodb
+    update_result = collection.update_one({"call_id": call_id}, {"$set": {f"summaries.{user_id}": result}})
+    if update_result.modified_count == 0:
+        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call not updated!")
+    # try parse summary and remove any leading summary prefixes
+    try:
+        return result.split(":")[1].strip()
+    except IndexError:
+        return result
+def term_extraction(collection, call_id, user_id, target_language):
+    combined_text = get_caption_text(collection, call_id, user_id)
+    if len(combined_text) > 50: # > min_caption_length: -> poor term extraction on short transcripts
+        # Extract Key Terms from Concatenated Caption Field
+        key_terms = extract_terms(combined_text, target_language, len(combined_text))
+        update_result = collection.update_one({"call_id": call_id}, {"$set": {f"key_terms.{user_id}": key_terms}})
+    if update_result.modified_count == 0:
+        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call not updated!")
+    return key_terms

backend/mongodb/operations/users.py ADDED Viewed

	@@ -0,0 +1,76 @@

+from fastapi import Body, Request, HTTPException, status
+from fastapi.encoders import jsonable_encoder
+import sys
+from ..models.users import User, UpdateUser
+from bson import ObjectId
+import re
+def create_user(collection, user: User = Body(...)):
+    user = jsonable_encoder(user)
+    new_user = collection.insert_one(user)
+    created_user = collection.find_one({"_id": new_user.inserted_id})
+    return created_user
+def list_users(collection, limit: int):
+    try:
+        users = list(collection.find(limit = limit))
+        return users
+    except:
+        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"No users found!")
+def find_user(collection, user_id: str):
+    if (user := collection.find_one({"user_id": user_id})):
+        return user
+    else:
+        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"User with user_id {user_id} not found!")
+def find_name_from_id(collection, user_id: str):
+    # find_one user record based on user id and project for user name
+    if (user_name := collection.find_one({"user_id": user_id}, {"name": 1, "_id": 0})):
+        return user_name['name'] # index name field from single field record returned
+    else:
+        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"User with user_id {user_id} not found!")
+def find_user_name(collection, name: str):
+    # search for name in lowercase
+    if (user := collection.find_one({"name": re.compile('^' + re.escape(name) + '$', re.IGNORECASE)})):
+        return user
+    else:
+        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"User with name {name} not found!")
+def find_user_email(collection, email: str):
+    if (user := collection.find_one({"email": re.compile('^' + re.escape(email) + '$', re.IGNORECASE)})):
+        return user
+    else:
+        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"User with Email Address {email} not found!")
+''' Update user record based on user object/json'''
+def update_user(collection, user_id: str, user: UpdateUser):
+    try:
+        user = {k: v for k, v in user.model_dump().items() if v is not None}
+        if len(user) >= 1:
+            update_result = collection.update_one({"user_id": user_id}, {"$set": user})
+            if update_result.modified_count == 0:
+                raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"User with user_id: '{user_id}' not found and updated!")
+        if (existing_users := collection.find_one({"user_id": user_id})) is not None:
+            return existing_users
+    except:
+        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"User with user_id: '{user_id}' not found and updated!")
+def delete_user(collection, user_id: str):
+    try:
+        deleted_user = collection.delete_one({"user_id": user_id})
+        if deleted_user.deleted_count == 1:
+            return f"User with user_id {user_id} deleted sucessfully"
+    except:
+        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"User with user_id {user_id} not found!")

backend/requirements.txt ADDED Viewed

	@@ -0,0 +1,28 @@

+colorlog==6.8.2
+contextlib2==21.6.0
+fastapi==0.110.1
+g2p_en==2.1.0
+matplotlib==3.7.0
+numpy==1.24.2
+openai==1.20.0
+protobuf==5.26.1
+pydantic==2.7.0
+pydub==0.25.1
+pymongo==4.6.2
+PySoundFile==0.9.0.post1
+python-dotenv==1.0.1
+python-socketio==5.9.0
+pymongo==4.6.2
+Requests==2.31.0
+sentencepiece==0.1.99
+simuleval==1.1.4
+soundfile==0.12.1
+spacy==3.7.4
+pytextrank==3.3.0
+torch==2.1.2
+torchaudio==2.1.2
+#transformers==4.20.1
+uvicorn==0.29.0
+vad==1.0.2
+hf_transfer==0.1.4
+huggingface_hub==0.19.4

backend/routes/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from.routing import router

backend/routes/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (235 Bytes). View file

backend/routes/__pycache__/routing.cpython-310.pyc ADDED Viewed

Binary file (375 Bytes). View file

backend/routes/routing.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from fastapi import APIRouter
+import sys
+from mongodb.endpoints import users, calls
+router = APIRouter()
+router.include_router(calls.router)
+router.include_router(users.router)

backend/tests/.pytest_cache/.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ # Created by pytest automatically.
2	+ *

backend/tests/.pytest_cache/CACHEDIR.TAG ADDED Viewed

	@@ -0,0 +1,4 @@

+Signature: 8a477f597d28d172789f06886806bc55
+# This file is a cache directory tag created by pytest.
+# For information about cache directory tags, see:
+#	https://bford.info/cachedir/spec.html

backend/tests/.pytest_cache/README.md ADDED Viewed

	@@ -0,0 +1,8 @@

+# pytest cache directory #
+This directory contains data from the pytest's cache plugin,
+which provides the `--lf` and `--ff` options, as well as the `cache` fixture.
+**Do not** commit this to version control.
+See [the docs](https://docs.pytest.org/en/stable/how-to/cache.html) for more information.

backend/tests/.pytest_cache/v/cache/lastfailed ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+  "test_client.py": true,
+  "unit_test.py::test_create_calls_success": true,
+  "unit_test.py::test_create_calls_failure": true,
+  "test_main.py::test_connect": true,
+  "test_main.py::test_disconnect": true,
+  "test_main.py::test_target_language": true,
+  "test_main.py::test_incoming_audio": true,
+  "test_main.py": true,
+  "unit_test.py::TestClient": true
+}

backend/tests/.pytest_cache/v/cache/nodeids ADDED Viewed

	@@ -0,0 +1,42 @@

+[
+  "integration_test.py::test_extracion_pass2",
+  "integration_test.py::test_extraction_fail",
+  "integration_test.py::test_extraction_pass",
+  "integration_test.py::test_search_fail",
+  "integration_test.py::test_search_pass",
+  "integration_test.py::test_search_pass2",
+  "integration_test.py::test_summary_fail",
+  "integration_test.py::test_summary_fail2",
+  "integration_test.py::test_summary_pass",
+  "integration_test.py::test_summary_pass2",
+  "test_client.py::test_client_add_bytes",
+  "test_client.py::test_client_init",
+  "test_client.py::test_client_resample_and_clear",
+  "test_client.py::test_client_vad",
+  "test_main.py::test_connect",
+  "test_main.py::test_disconnect",
+  "test_main.py::test_incoming_audio",
+  "test_main.py::test_target_language",
+  "unit_test.py::test_create_call_pass",
+  "unit_test.py::test_create_calls_failure",
+  "unit_test.py::test_create_calls_success",
+  "unit_test.py::test_create_user_pass",
+  "unit_test.py::test_delete_user_fail",
+  "unit_test.py::test_delete_user_pass",
+  "unit_test.py::test_find_call_fail",
+  "unit_test.py::test_find_call_pass",
+  "unit_test.py::test_find_name_id_fail",
+  "unit_test.py::test_find_name_id_pass",
+  "unit_test.py::test_find_user_call_fail",
+  "unit_test.py::test_find_user_call_pass",
+  "unit_test.py::test_find_user_fail",
+  "unit_test.py::test_find_user_pass",
+  "unit_test.py::test_get_captions_fail",
+  "unit_test.py::test_get_captions_pass",
+  "unit_test.py::test_root_pass",
+  "unit_test.py::test_update_call_fail",
+  "unit_test.py::test_update_call_pass",
+  "unit_test.py::test_update_caption_pass",
+  "unit_test.py::test_update_user_fail",
+  "unit_test.py::test_update_user_pass"
+]

backend/tests/.pytest_cache/v/cache/stepwise ADDED Viewed

	@@ -0,0 +1 @@


1	+ []

backend/tests/__init__.py ADDED Viewed

File without changes

backend/tests/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (196 Bytes). View file

backend/tests/__pycache__/integration_test.cpython-310-pytest-8.1.1.pyc ADDED Viewed

Binary file (3.98 kB). View file

backend/tests/__pycache__/test_client.cpython-310-pytest-8.1.1.pyc ADDED Viewed

Binary file (6.95 kB). View file

backend/tests/__pycache__/test_main.cpython-310-pytest-8.1.1.pyc ADDED Viewed

Binary file (3.92 kB). View file

backend/tests/__pycache__/test_main.cpython-310.pyc ADDED Viewed

Binary file (2.2 kB). View file

backend/tests/__pycache__/unit_test.cpython-310-pytest-8.1.1.pyc ADDED Viewed

Binary file (6.16 kB). View file

backend/tests/integration_test.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import os
+from dotenv import dotenv_values
+from fastapi import FastAPI
+from pymongo import MongoClient
+from main import requests
+import uuid
+import pytest
+from dotenv import load_dotenv
+import requests
+import json
+# Test Fuzzy Search Integrated component on existing call records
+def test_search_pass():
+     # Test against records with mention of 'Football'
+    response = requests.get("http://127.0.0.1:8080/call/fuzzy-search/ozpHhyum3sayTdxIKUAtF51uvWJ2/football")
+    assert response.status_code == 200
+    assert len(response.json()) == 3  # three matching call transcripts
+# Test Fuzzy Search Integrated component on existing call records
+def test_search_pass2():
+     # Test against records with mention of 'Football' mispelled as 'Footbll'
+    response = requests.get("http://127.0.0.1:8080/call/fuzzy-search/ozpHhyum3sayTdxIKUAtF51uvWJ2/footbll")
+    assert response.status_code == 200
+    assert len(response.json()) == 3  # still three matching call transcripts
+# Test Fuzzy Search Integrated component on existing call records
+def test_search_fail():
+     # Test against records with mention of 'Football
+    response = requests.get("http://127.0.0.1:8080/call/fuzzy-search/ozpHhyum3sayTdxIKUAtF51uvWJ2/basketball")
+    assert response.status_code == 200
+    assert len(response.json()) == 0  # no matching call transcripts
+# Test Summarisation Integrated component on existing call records
+def test_summary_pass():
+     # Test with summarisation of english version transcript
+    response = requests.get("http://127.0.0.1:8080//call/summarise/FCnORXmLkw48G5mgscBV/ozpHhyum3sayTdxIKUAtF51uvWJ2/eng")
+    assert response.status_code == 200
+def test_summary_pass2():
+     # Test with summarisation of polish version transcript
+    response = requests.get("http://127.0.0.1:8080//call/summarise/FCnORXmLkw48G5mgscBV/fNGMkWoSK7fxwE3tbp8E816sthd2/pol")
+    assert response.status_code == 200
+def test_summary_fail():
+     # Test with summarisation of english version transcript
+    response = requests.get("http://127.0.0.1:8080//call/summarise/falseID/ozpHhyum3sayTdxIKUAtF51uvWJ2/eng") # non exising call record
+    assert response.status_code == 404
+def test_summary_fail2():
+     # Test with summarisation of english version transcript
+    response = requests.get("http://127.0.0.1:8080//call/summarise/FCnORXmLkw48G5mgscBV/falseID/eng") # non exising user record
+    assert response.status_code == 404
+# Test Key Key Extraction Integrated component on existing call records
+def test_extraction_pass():
+     # Test against records with mention of 'Football'
+    response = requests.get("http://127.0.0.1:8080//call/term-extraction/FCnORXmLkw48G5mgscBV/ozpHhyum3sayTdxIKUAtF51uvWJ2/eng")
+    assert response.status_code == 200
+    assert len(response.json()) == 3  # still three matching call transcripts
+# Test Fuzzy Search Integrated component on existing call records
+def test_extracion_pass2():
+     # Test against records with mention of 'Football' mispelled as 'Footbll'
+    response = requests.get("http://127.0.0.1:8080//call/term-extraction/FCnORXmLkw48G5mgscBV/fNGMkWoSK7fxwE3tbp8E816sthd2/pol")
+    assert response.status_code == 200
+    assert len(response.json()) == 3  # still three matching call transcripts
+# Test Fuzzy Search Integrated component on existing call records
+def test_extraction_fail():
+     # Test against records with mention of 'Football
+    response = requests.get("http://127.0.0.1:8080//call/term-extraction/FCnORXmLkw48G5mgscBV/ozpHhyum3sayTdxIKUAtF51uvWJ2/eng")
+    assert response.status_code == 200
+    assert len(response.json()) == 0  # no matching call transcripts

backend/tests/silence.wav ADDED Viewed

Binary file (302 kB). View file

backend/tests/speaking.wav ADDED Viewed

Binary file (255 kB). View file

backend/tests/test_client.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import os
+import wave
+import pytest
+import torchaudio
+import os
+import sys
+current_dir = os.path.dirname(os.path.abspath(__file__))
+parent_dir = os.path.dirname(current_dir)
+sys.path.append(parent_dir)
+from Client import Client
+@pytest.fixture
+def mock_client():
+    client = Client("test_sid", "test_client_id", "testusername", original_sr=44100)
+    return client
+def test_client_init(mock_client):
+    assert mock_client.sid == "test_sid"
+    assert mock_client.client_id == "test_client_id"
+    assert mock_client.call_id == None
+    assert mock_client.buffer == bytearray()
+    assert mock_client.output_path == "test_sid_output_audio.wav"
+    assert mock_client.target_language == None
+    assert mock_client.original_sr == 44100
+    assert mock_client.vad.sample_rate == 16000
+    assert mock_client.vad.frame_length == 25
+    assert mock_client.vad.frame_shift == 20
+    assert mock_client.vad.energy_threshold == 0.05
+    assert mock_client.vad.pre_emphasis == 0.95
+def test_client_add_bytes(mock_client):
+    mock_client.add_bytes(b"test")
+    assert mock_client.buffer == b"test"
+def test_client_resample_and_clear(mock_client):
+    location = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
+    speaking_bytes = wave.open(location + "/speaking.wav", "rb").readframes(-1)
+    mock_client.add_bytes(speaking_bytes)
+    resampled_waveform = mock_client.resample_and_clear()
+    torchaudio.save(location + "testoutput.wav", resampled_waveform, 16000)
+    with wave.open(location + "testoutput.wav", "rb") as wf:
+        sample_rate = wf.getframerate()
+    assert mock_client.buffer == bytearray()
+    assert sample_rate == 16000
+def test_client_vad(mock_client):
+    location = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
+    speaking_bytes = wave.open(location + "/speaking.wav", "rb").readframes(-1)
+    mock_client.add_bytes(speaking_bytes)
+    resampled_waveform = mock_client.resample_and_clear()
+    assert mock_client.buffer == bytearray()
+    assert mock_client.vad_analyse(resampled_waveform) == True
+    silent_bytes = wave.open(location + "/silence.wav", "rb").readframes(-1)
+    mock_client.add_bytes(silent_bytes)
+    resampled_waveform = mock_client.resample_and_clear()
+    assert mock_client.buffer == bytearray()
+    assert mock_client.vad_analyse(resampled_waveform) == False

backend/tests/test_main.py ADDED Viewed

	@@ -0,0 +1,86 @@

+from dotenv import dotenv_values
+from fastapi import FastAPI
+from pymongo import MongoClient
+import pytest
+from unittest.mock import AsyncMock, MagicMock, ANY
+import socketio
+import os
+import sys
+current_dir = os.path.dirname(os.path.abspath(__file__))
+parent_dir = os.path.dirname(current_dir)
+sys.path.append(parent_dir)
+from Client import Client
+from main import sio, connect, disconnect, target_language, call_user, answer_call, incoming_audio, clients, rooms, app
+from unittest.mock import patch
+sio = socketio.AsyncServer(
+    async_mode="asgi",
+    cors_allowed_origins="*",
+    # engineio_logger=logger,
+)
+config = dotenv_values(".env")
+# Read connection string from environment vars
+# uri = os.environ['MONGODB_URI']
+# Read connection string from .env file
+uri = config['MONGODB_URI']
+app.mongodb_client = MongoClient(uri)
+app.database = app.mongodb_client['IT-Cluster1'] #connect to interpretalk primary db
+try:
+    app.mongodb_client.admin.command('ping')
+    print("MongoDB Connection Established...")
+except Exception as e:
+    print(e)
+# shutdown logic
+print("Closing MongoDB Connection...")
+@pytest.fixture(autouse=True)
+def setup_clients_and_rooms():
+    global clients, rooms
+    clients.clear()
+    rooms.clear()
+    yield
+@pytest.fixture
+def mock_client():
+    client = Client("test_sid", "test_client_id", "testusername", original_sr=44100)
+    return client
+@pytest.mark.asyncio
+async def test_connect(mock_client):
+    sid = mock_client.sid
+    environ = {'QUERY_STRING': 'client_id=test_client_id'}
+    await connect(sid, environ)
+    app.mongodb_client.close()
+    assert sid in clients
+@pytest.mark.asyncio
+async def test_disconnect(mock_client):
+    sid = mock_client.sid
+    clients[sid] = mock_client
+    await disconnect(sid)
+    assert sid not in clients
+@pytest.mark.asyncio
+async def test_target_language(mock_client):
+    sid = mock_client.sid
+    clients[sid] = mock_client
+    target_lang = "fr"
+    await target_language(sid, target_lang)
+    assert clients[sid].target_language == "fr"
+@pytest.mark.asyncio
+async def test_incoming_audio(mock_client):
+    sid = mock_client.sid
+    clients[sid] = mock_client
+    data = b"\x01"
+    call_id = "1234"
+    await incoming_audio(sid, data, call_id)
+    assert clients[sid].get_length() != 0

backend/tests/unit_test.py ADDED Viewed

	@@ -0,0 +1,277 @@

+import os
+from dotenv import dotenv_values
+from fastapi import FastAPI
+from pymongo import MongoClient
+from main import requests
+import uuid
+import pytest
+from dotenv import load_dotenv
+import requests
+import json
+# Test Root endpoint
+def test_root_pass():
+    response = requests.get("http://127.0.0.1:8080/")
+    assert response.status_code == 200
+    assert response.json() == {"message": "Welcome to InterpreTalk!"}
+# POST /user/
+# Test DB user record creation including response validation
+def test_create_user_pass():
+    payload = {
+        "name": "Tester1",
+        "user_id": "testerID",
+        "email": "tester1@gmail.com"
+    }
+    response = requests.post("http://127.0.0.1:8080/user/", json=payload)
+    assert response.status_code == 201
+'''Test User Endpoints'''
+# GET  /user/
+# Test finding DB user record based on user ID
+def test_find_user_pass():
+    response = requests.get("http://localhost:8080/user/ozpHhyum3sayTdxIKUAtF51uvWJ2")  # existing user record
+    assert response.status_code == 200
+    assert response.json() == {
+                "user_id": "ozpHhyum3sayTdxIKUAtF51uvWJ2",
+                "name": "Benjamin",
+                "email": "benjolounchained@gmail.com"
+    }
+def test_find_user_fail():
+    response = requests.get(f"http://127.0.0.1:8080/users/fakeID")  # non-existing user record
+    # check if response is inteded error code
+    assert response.status_code == 404
+# PUT /user/{user_id}
+# Updating DB user record based on user ID
+def test_update_user_pass():
+    payload = {
+        "name": "TesterNewName"
+    }
+    response = requests.patch(f"http://127.0.0.1:8080/users/testerID", json=payload)
+    assert response.status_code == 202
+    assert response.json() == {
+        "name": "TesterNewName",
+        "user_id": "testerID",
+        "email": "tester1@gmail.com"
+    }
+# Test with non-existing user ID
+def test_update_user_fail():
+    payload = {
+        "name": "TesterNewName"
+    }
+    response = requests.patch(f"http://127.0.0.1:8080/users/falseID", json=payload)
+    assert response.status_code == 404
+# DELETE /user/{user_id}
+def test_delete_user_pass():
+    response = requests.delete(f"http://127.0.0.1:8080/users/testerID")
+    assert response.status_code == 200
+def test_delete_user_fail():
+    response = requests.delete(f"http://127.0.0.1:8080/users/fakeID")
+    assert response.status_code == 404
+# GET /user/find-name-id/{user_ud}
+def test_find_name_id_pass():
+    response = requests.get("http://127.0.0.1:8080/user/find-name-id/ozpHhyum3sayTdxIKUAtF51uvWJ2")
+    assert response.status_code == 201
+    assert response.json == {
+        'name': "Benjamin"
+    }
+def test_find_name_id_fail():
+    response = requests.get("http://127.0.0.1:8080/user/find-name-id/falseID")
+    assert response.status_code == 404
+'''Test Call endpoints'''
+# POST /call/create-call
+# Test creating call record
+def test_create_call_pass():
+    payload = {
+        "call_id": "test001",
+        "caller_id": "tester01",
+        "callee_id": "tester02",
+        "captions": [
+            {
+            "author_id": "tester01",
+            "author_username": "tester",
+            "original_text": "It is a test",
+            "translated_text": "Es un prueba",
+            }
+        ]
+    }
+    response = requests.post("http://127.0.0.1:8080/call/create-call", json=payload)
+    assert response.status_code == 201
+# GET  /call/find-call
+# Test finding DB call record based on call ID
+def test_find_call_pass():
+    response = requests.get(f"http://127.0.0.1:8080/call/test001")  # existing user record
+    assert response.status_code == 200
+    assert response.json() == {
+        "call_id": "test001",
+        "caller_id": "tester01",
+        "callee_id": "tester02",
+        "captions": [
+            {
+            "author_id": "tester01",
+            "author_username": "tester",
+            "original_text": "It is a test",
+            "translated_text": "Es un prueba",
+            }
+        ]
+    }
+def test_find_call_fail():
+    response = requests.get(f"http://127.0.0.1:8080/call/fakeID")  # non-existing user record
+    # check if response is inteded error code
+    assert response.status_code == 404
+# GET  /call/find-user-call
+# Test finding DB call record based on user ID
+def test_find_user_call_pass():
+    response = requests.get(f"http://127.0.0.1:8080/call/find-user-calls/tester01")  # existing user record
+    assert response.status_code == 200
+    assert response.json() == {
+        "call_id": "test001",
+        "caller_id": "tester01",
+        "callee_id": "tester02",
+        "captions": [
+            {
+            "author_id": "tester01",
+            "author_username": "tester",
+            "original_text": "It is a test",
+            "translated_text": "Es un prueba",
+            }
+        ]
+    }
+def test_find_user_call_fail():
+    response = requests.get(f"http://127.0.0.1:8080/calls/fakeID")  # non-existing user record
+    # check if response is inteded error code
+    assert response.status_code == 404
+# GET  /call/get-captions
+# Test finding DB call record based on user ID
+def test_get_captions_pass():
+    response = requests.get(f"http://127.0.0.1:8080/call/find-user-calls/test001/tester01")  # existing user record
+    assert response.status_code == 200
+    assert response.json() == {
+        "call_id": "test001",
+        "caller_id": "tester01",
+        "callee_id": "tester02",
+        "captions": [
+            {
+            "author_id": "tester01",
+            "author_username": "tester",
+            "original_text": "It is a test",
+            "translated_text": "Es un prueba",
+            }
+        ]
+    }
+def test_get_captions_fail():
+    response = requests.get(f"http://127.0.0.1:8080/call/find-user-calls/test001/tester00")  # fake user record
+    # check if response is inteded error code
+    assert response.status_code == 404
+def test_get_captions_fail():
+    response = requests.get(f"http://127.0.0.1:8080/call/find-user-calls/test000/tester01")  # fake call record
+    # check if response is inteded error code
+    assert response.status_code == 404
+# GET /call/update-call/{call_id}
+# test updating call record based on id
+def test_update_call_pass():
+    payload = {
+        "callee_id": "TesterNewName"
+    }
+    response = requests.patch(f"http://127.0.0.1:8080/call/update-call/tester02", json=payload)
+    assert response.status_code == 202
+    assert response.json() == {
+        "call_id": "test001",
+        "caller_id": "tester01",
+        "callee_id": "tester02",
+        "captions": [
+            {
+            "author_id": "tester01",
+            "author_username": "tester",
+            "original_text": "It is a test",
+            "translated_text": "Es un prueba",
+            }
+        ]
+    }
+# Test with non-existing user ID
+def test_update_call_fail():
+    payload = {
+        "callee_id": "testName"
+    }
+    response = requests.patch(f"http://127.0.0.1:8080/users/falseID", json=payload)
+    assert response.status_code == 404
+# GET /call/update-captions/{call_id}
+# test updating caption record based on id
+def test_update_caption_pass():
+    payload = {
+        "author_username": "testerNew"
+    }
+    response = requests.patch(f"http://127.0.0.1:8080/call/update-caption/tester01", json=payload)
+    assert response.status_code == 202
+# Test with non-existing user ID
+def test_update_call_fail():
+    payload = {
+        "callee_id": "testName"
+    }
+    response = requests.patch(f"http://127.0.0.1:8080/update-caption/falseID", json=payload)
+    assert response.status_code == 404
+# DELETE /call/delete-call/{call_id}
+def test_delete_user_pass():
+    response = requests.delete(f"http://127.0.0.1:8080//call/delete-call/test001")
+    assert response.status_code == 200
+def test_delete_user_fail():
+    response = requests.delete(f"http://127.0.0.1:8080//call/delete-call/test009")
+    assert response.status_code == 404

backend/utils/__pycache__/text_rank.cpython-310.pyc ADDED Viewed

Binary file (2.03 kB). View file

backend/utils/text_rank.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import spacy
+import pytextrank
+from spacy.tokens import Span
+# Define decorator for converting to singular version of words
+@spacy.registry.misc("plural_scrubber")
+def plural_scrubber():
+    def scrubber_func(span: Span) -> str:
+        return span.lemma_
+    return scrubber_func
+def model_selector(target_language: str):
+    # Load subset of non-english models
+    language_model = {
+        "spa": "es_core_news_sm",
+        "fra": "fr_core_news_sm",
+        "pol": "pl_core_news_sm",
+        "deu": "de_core_news_sm",
+        "ita": "it_core_news_sm",
+        "por": "pt_core_news_sm",
+        "nld": "nl_core_news_sm",
+        "fin": "fi_core_news_sm",
+        "ron": "ro_core_news_sm",
+        "rus": "ru_core_news_sm"
+    }
+    try:
+        nlp = spacy.load(language_model[target_language])
+    except KeyError:
+        # Load a spaCy English model
+        nlp = spacy.load("en_core_web_lg")
+    # Add TextRank component to pipeline with stopwords
+    nlp.add_pipe("textrank", config={
+                            "stopwords": {token:["NOUN"] for token in nlp.Defaults.stop_words},
+                            "scrubber": {"@misc": "plural_scrubber"}})
+    return nlp
+def extract_terms(text, target_language, length):
+    nlp = model_selector(target_language)
+    # Perform fact extraction on overall summary and segment summaries
+    doc = nlp(text)
+    if length < 100:
+        # Get single most used key term
+        phrases = {phrase.text for phrase in doc._.phrases[:1]}
+    elif length > 100 and length < 300:
+        # Create unique set from top 2 ranked phrases
+        phrases = {phrase.text for phrase in doc._.phrases[:2]}
+    if length > 300:
+        # Create unique set from top 3 ranked phrases
+        phrases = {phrase.text for phrase in doc._.phrases[:3]}
+    return list(phrases)