benjolo commited on
Commit
ddc5bbd
1 Parent(s): 78289fa

Uploading completed backend

Browse files
Files changed (45) hide show
  1. backend/.DS_Store +0 -0
  2. backend/.gitignore +2 -0
  3. backend/Client.py +81 -0
  4. backend/__pycache__/Client.cpython-310.pyc +0 -0
  5. backend/__pycache__/main.cpython-310.pyc +0 -0
  6. backend/logging.yaml +22 -0
  7. backend/main.py +344 -0
  8. backend/mongodb/endpoints/__pycache__/calls.cpython-310.pyc +0 -0
  9. backend/mongodb/endpoints/__pycache__/users.cpython-310.pyc +0 -0
  10. backend/mongodb/endpoints/calls.py +74 -0
  11. backend/mongodb/endpoints/users.py +43 -0
  12. backend/mongodb/models/__pycache__/calls.cpython-310.pyc +0 -0
  13. backend/mongodb/models/__pycache__/users.cpython-310.pyc +0 -0
  14. backend/mongodb/models/calls.py +72 -0
  15. backend/mongodb/models/users.py +37 -0
  16. backend/mongodb/operations/__pycache__/calls.cpython-310.pyc +0 -0
  17. backend/mongodb/operations/__pycache__/users.cpython-310.pyc +0 -0
  18. backend/mongodb/operations/calls.py +197 -0
  19. backend/mongodb/operations/users.py +76 -0
  20. backend/requirements.txt +28 -0
  21. backend/routes/__init__.py +1 -0
  22. backend/routes/__pycache__/__init__.cpython-310.pyc +0 -0
  23. backend/routes/__pycache__/routing.cpython-310.pyc +0 -0
  24. backend/routes/routing.py +7 -0
  25. backend/tests/.pytest_cache/.gitignore +2 -0
  26. backend/tests/.pytest_cache/CACHEDIR.TAG +4 -0
  27. backend/tests/.pytest_cache/README.md +8 -0
  28. backend/tests/.pytest_cache/v/cache/lastfailed +11 -0
  29. backend/tests/.pytest_cache/v/cache/nodeids +42 -0
  30. backend/tests/.pytest_cache/v/cache/stepwise +1 -0
  31. backend/tests/__init__.py +0 -0
  32. backend/tests/__pycache__/__init__.cpython-310.pyc +0 -0
  33. backend/tests/__pycache__/integration_test.cpython-310-pytest-8.1.1.pyc +0 -0
  34. backend/tests/__pycache__/test_client.cpython-310-pytest-8.1.1.pyc +0 -0
  35. backend/tests/__pycache__/test_main.cpython-310-pytest-8.1.1.pyc +0 -0
  36. backend/tests/__pycache__/test_main.cpython-310.pyc +0 -0
  37. backend/tests/__pycache__/unit_test.cpython-310-pytest-8.1.1.pyc +0 -0
  38. backend/tests/integration_test.py +98 -0
  39. backend/tests/silence.wav +0 -0
  40. backend/tests/speaking.wav +0 -0
  41. backend/tests/test_client.py +59 -0
  42. backend/tests/test_main.py +86 -0
  43. backend/tests/unit_test.py +277 -0
  44. backend/utils/__pycache__/text_rank.cpython-310.pyc +0 -0
  45. backend/utils/text_rank.py +60 -0
backend/.DS_Store ADDED
Binary file (6.15 kB). View file
 
backend/.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ myenv
2
+ .pytest_cache
backend/Client.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Tuple
2
+ import wave
3
+ import os
4
+
5
+ import torchaudio
6
+ from vad import EnergyVAD
7
+ TARGET_SAMPLING_RATE = 16000
8
+
9
+ def create_frames(data: bytes, frame_duration: int) -> Tuple[bytes]:
10
+ frame_size = int(TARGET_SAMPLING_RATE * (frame_duration / 1000))
11
+ return (data[i:i + frame_size] for i in range(0, len(data), frame_size)), frame_size
12
+
13
+ def detect_activity(energies: list):
14
+ if sum(energies) < len(energies) / 12:
15
+ return False
16
+ count = 0
17
+ for energy in energies:
18
+ if energy == 1:
19
+ count += 1
20
+ if count == 12:
21
+ return True
22
+ else:
23
+ count = 0
24
+ return False
25
+
26
+ class Client:
27
+ def __init__(self, sid, client_id, username, call_id=None, original_sr=None):
28
+ self.sid = sid
29
+ self.client_id = client_id
30
+ self.username = username,
31
+ self.call_id = call_id
32
+ self.buffer = bytearray()
33
+ self.output_path = self.sid + "_output_audio.wav"
34
+ self.target_language = None
35
+ self.original_sr = original_sr
36
+ self.vad = EnergyVAD(
37
+ sample_rate=TARGET_SAMPLING_RATE,
38
+ frame_length=25,
39
+ frame_shift=20,
40
+ energy_threshold=0.05,
41
+ pre_emphasis=0.95,
42
+ ) # PM - Default values given in the docs for this class
43
+
44
+ def add_bytes(self, new_bytes):
45
+ self.buffer += new_bytes
46
+
47
+ def resample_and_clear(self):
48
+ print(f"📥 [ClientAudioBuffer] Writing {len(self.buffer)} bytes to {self.output_path}")
49
+ with wave.open(self.sid + "_OG.wav", "wb") as wf:
50
+ wf.setnchannels(1)
51
+ wf.setsampwidth(2)
52
+ wf.setframerate(self.original_sr)
53
+ wf.setnframes(0)
54
+ wf.setcomptype("NONE", "not compressed")
55
+ wf.writeframes(self.buffer)
56
+ waveform, sample_rate = torchaudio.load(self.sid + "_OG.wav")
57
+ resampler = torchaudio.transforms.Resample(sample_rate, TARGET_SAMPLING_RATE, dtype=waveform.dtype)
58
+ resampled_waveform = resampler(waveform)
59
+ self.buffer = bytearray()
60
+ return resampled_waveform
61
+
62
+ def vad_analyse(self, resampled_waveform):
63
+ torchaudio.save(self.output_path, resampled_waveform, TARGET_SAMPLING_RATE)
64
+ vad_array = self.vad(resampled_waveform)
65
+ # print(f"VAD OUTPUT: {vad_array}")
66
+ return detect_activity(vad_array)
67
+
68
+ def write_to_file(self, resampled_waveform):
69
+ torchaudio.save(self.output_path, resampled_waveform, TARGET_SAMPLING_RATE)
70
+
71
+ def get_length(self):
72
+ return len(self.buffer)
73
+
74
+ def __del__(self):
75
+ if len(self.buffer) > 0:
76
+ print(f"🚨 [ClientAudioBuffer] Buffer not empty for {self.sid} ({len(self.buffer)} bytes)!")
77
+ if os.path.exists(self.output_path):
78
+ os.remove(self.output_path)
79
+ if os.path.exists(self.sid + "_OG.wav"):
80
+ os.remove(self.sid + "_OG.wav")
81
+
backend/__pycache__/Client.cpython-310.pyc ADDED
Binary file (3.37 kB). View file
 
backend/__pycache__/main.cpython-310.pyc ADDED
Binary file (8.57 kB). View file
 
backend/logging.yaml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: 1
2
+ disable_existing_loggers: false
3
+
4
+ formatters:
5
+ standard:
6
+ format: "%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s"
7
+
8
+ handlers:
9
+ console:
10
+ class: logging.StreamHandler
11
+ formatter: standard
12
+ stream: ext://sys.stdout
13
+
14
+ loggers:
15
+ uvicorn:
16
+ error:
17
+ propagate: true
18
+
19
+ root:
20
+ level: INFO
21
+ handlers: [console]
22
+ propagate: no
backend/main.py ADDED
@@ -0,0 +1,344 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from operator import itemgetter
2
+ import os
3
+ from datetime import datetime
4
+ import uvicorn
5
+ from typing import Any, Optional, Tuple, Dict, TypedDict
6
+ from urllib import parse
7
+ from uuid import uuid4
8
+ import logging
9
+ from fastapi.logger import logger as fastapi_logger
10
+ import sys
11
+
12
+ from fastapi import FastAPI
13
+ from fastapi.middleware.cors import CORSMiddleware
14
+ from fastapi import APIRouter, Body, Request, status
15
+ from pymongo import MongoClient
16
+ from dotenv import dotenv_values
17
+ from routes import router as api_router
18
+ from contextlib import asynccontextmanager
19
+ import requests
20
+
21
+ from typing import List
22
+ from datetime import date
23
+ from mongodb.operations.calls import *
24
+ from mongodb.operations.users import *
25
+ from mongodb.models.calls import UserCall, UpdateCall
26
+ # from mongodb.endpoints.calls import *
27
+
28
+ from transformers import AutoProcessor, SeamlessM4Tv2Model
29
+
30
+ # from seamless_communication.inference import Translator
31
+ from Client import Client
32
+ import numpy as np
33
+ import torch
34
+ import socketio
35
+
36
+ # Configure logger
37
+ gunicorn_error_logger = logging.getLogger("gunicorn.error")
38
+ gunicorn_logger = logging.getLogger("gunicorn")
39
+ uvicorn_access_logger = logging.getLogger("uvicorn.access")
40
+
41
+ gunicorn_error_logger.propagate = True
42
+ gunicorn_logger.propagate = True
43
+ uvicorn_access_logger.propagate = True
44
+
45
+ uvicorn_access_logger.handlers = gunicorn_error_logger.handlers
46
+ fastapi_logger.handlers = gunicorn_error_logger.handlers
47
+
48
+ # sio is the main socket.io entrypoint
49
+ sio = socketio.AsyncServer(
50
+ async_mode="asgi",
51
+ cors_allowed_origins="*",
52
+ logger=gunicorn_logger,
53
+ engineio_logger=gunicorn_logger,
54
+ )
55
+ # sio.logger.setLevel(logging.DEBUG)
56
+ socketio_app = socketio.ASGIApp(sio)
57
+ # app.mount("/", socketio_app)
58
+
59
+ # config = dotenv_values(".env")
60
+
61
+ # Read connection string from environment vars
62
+ uri = os.environ['MONGODB_URI']
63
+
64
+ # Read connection string from .env file
65
+ # uri = config['MONGODB_URI']
66
+
67
+
68
+ # MongoDB Connection Lifespan Events
69
+ @asynccontextmanager
70
+ async def lifespan(app: FastAPI):
71
+ # startup logic
72
+ app.mongodb_client = MongoClient(uri)
73
+ app.database = app.mongodb_client['IT-Cluster1'] #connect to interpretalk primary db
74
+ try:
75
+ app.mongodb_client.admin.command('ping')
76
+ print("MongoDB Connection Established...")
77
+ except Exception as e:
78
+ print(e)
79
+
80
+ yield
81
+
82
+ # shutdown logic
83
+ print("Closing MongoDB Connection...")
84
+ app.mongodb_client.close()
85
+
86
+ app = FastAPI(lifespan=lifespan, logger=gunicorn_logger)
87
+
88
+ # New CORS funcitonality
89
+ app.add_middleware(
90
+ CORSMiddleware,
91
+ allow_origins=["*"], # configured node app port
92
+ allow_credentials=True,
93
+ allow_methods=["*"],
94
+ allow_headers=["*"],
95
+ )
96
+
97
+ app.include_router(api_router) # include routers for user, calls and transcripts operations
98
+
99
+ DEBUG = True
100
+
101
+ ESCAPE_HATCH_SERVER_LOCK_RELEASE_NAME = "remove_server_lock"
102
+
103
+ TARGET_SAMPLING_RATE = 16000
104
+ MAX_BYTES_BUFFER = 960_000
105
+
106
+ print("")
107
+ print("")
108
+ print("=" * 18 + " Interpretalk is starting... " + "=" * 18)
109
+
110
+ ###############################################
111
+ # Configure socketio server
112
+ ###############################################
113
+
114
+ # TODO PM - change this to the actual path
115
+ # seamless remnant code
116
+ CLIENT_BUILD_PATH = "../streaming-react-app/dist/"
117
+ static_files = {
118
+ "/": CLIENT_BUILD_PATH,
119
+ "/assets/seamless-db6a2555.svg": {
120
+ "filename": CLIENT_BUILD_PATH + "assets/seamless-db6a2555.svg",
121
+ "content_type": "image/svg+xml",
122
+ },
123
+ }
124
+ # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
125
+ device = torch.device("cpu")
126
+ processor = AutoProcessor.from_pretrained("facebook/seamless-m4t-v2-large")
127
+
128
+ # PM - hardcoding temporarily as my GPU doesnt have enough vram
129
+ model = SeamlessM4Tv2Model.from_pretrained("facebook/seamless-m4t-v2-large").to(device)
130
+
131
+
132
+ bytes_data = bytearray()
133
+ model_name = "seamlessM4T_v2_large"
134
+ vocoder_name = "vocoder_v2" if model_name == "seamlessM4T_v2_large" else "vocoder_36langs"
135
+
136
+ clients = {}
137
+ rooms = {}
138
+
139
+
140
+ def get_collection_users():
141
+ return app.database["user_records"]
142
+
143
+ def get_collection_calls():
144
+ return app.database["call_records"]
145
+
146
+
147
+ @app.get("/", response_description="Welcome User")
148
+ def test():
149
+ return {"message": "Welcome to InterpreTalk!"}
150
+
151
+
152
+ async def send_translated_text(client_id, username, original_text, translated_text, room_id):
153
+ # print(rooms) # Debugging
154
+ # print(clients) # Debugging
155
+
156
+ data = {
157
+ "author_id": str(client_id),
158
+ "author_username": str(username),
159
+ "original_text": str(original_text),
160
+ "translated_text": str(translated_text),
161
+ "timestamp": str(datetime.now())
162
+ }
163
+ gunicorn_logger.info("SENDING TRANSLATED TEXT TO CLIENT")
164
+ await sio.emit("translated_text", data, room=room_id)
165
+ gunicorn_logger.info("SUCCESSFULLY SEND AUDIO TO FRONTEND")
166
+
167
+
168
+ @sio.on("connect")
169
+ async def connect(sid, environ):
170
+ print(f"📥 [event: connected] sid={sid}")
171
+ query_params = dict(parse.parse_qsl(environ["QUERY_STRING"]))
172
+
173
+ client_id = query_params.get("client_id")
174
+ gunicorn_logger.info(f"📥 [event: connected] sid={sid}, client_id={client_id}")
175
+
176
+ # get username to Client Object from DB
177
+ username = find_name_from_id(get_collection_users(), client_id)
178
+
179
+ # sid = socketid, client_id = client specific ID ,always the same for same user
180
+ clients[sid] = Client(sid, client_id, username)
181
+ print(clients[sid].username)
182
+ gunicorn_logger.warning(f"Client connected: {sid}")
183
+ gunicorn_logger.warning(clients)
184
+
185
+
186
+ @sio.on("disconnect")
187
+ async def disconnect(sid):
188
+ gunicorn_logger.debug(f"📤 [event: disconnected] sid={sid}")
189
+
190
+ call_id = clients[sid].call_id
191
+ user_id = clients[sid].client_id
192
+ target_language = clients[sid].target_language
193
+
194
+ clients.pop(sid, None)
195
+
196
+ # Perform Key Term Extraction and summarisation
197
+ try:
198
+ # Get combined caption field for call record based on call_id
199
+ key_terms = term_extraction(get_collection_calls(), call_id, user_id, target_language)
200
+
201
+ # Perform summarisation based on target language
202
+ summary_result = summarise(get_collection_calls(), call_id, user_id, target_language)
203
+
204
+ except:
205
+ gunicorn_logger.error(f"📤 [event: term_extraction/summarisation request error] sid={sid}, call={call_id}")
206
+
207
+
208
+ @sio.on("target_language")
209
+ async def target_language(sid, target_lang):
210
+ gunicorn_logger.info(f"📥 [event: target_language] sid={sid}, target_lang={target_lang}")
211
+ clients[sid].target_language = target_lang
212
+
213
+
214
+ @sio.on("call_user")
215
+ async def call_user(sid, call_id):
216
+ clients[sid].call_id = call_id
217
+ gunicorn_logger.info(f"CALL {sid}: entering room {call_id}")
218
+ rooms[call_id] = rooms.get(call_id, [])
219
+ if sid not in rooms[call_id] and len(rooms[call_id]) < 2:
220
+ rooms[call_id].append(sid)
221
+ sio.enter_room(sid, call_id)
222
+ else:
223
+ gunicorn_logger.info(f"CALL {sid}: room {call_id} is full")
224
+ # await sio.emit("room_full", room=call_id, to=sid)
225
+
226
+ # BO - Get call id from dictionary created during socketio connection
227
+ client_id = clients[sid].client_id
228
+
229
+ gunicorn_logger.warning(f"NOW TRYING TO CREATE DB RECORD FOR Caller with ID: {client_id} for call: {call_id}")
230
+ # BO -> Create Call Record with Caller and call_id field (None for callee, duration, terms..)
231
+ request_data = {
232
+ "call_id": str(call_id),
233
+ "caller_id": str(client_id),
234
+ "creation_date": str(datetime.now())
235
+ }
236
+
237
+ response = create_calls(get_collection_calls(), request_data)
238
+ print(response) # BO - print created db call record
239
+
240
+
241
+ @sio.on("audio_config")
242
+ async def audio_config(sid, sample_rate):
243
+ clients[sid].original_sr = sample_rate
244
+
245
+
246
+ @sio.on("answer_call")
247
+ async def answer_call(sid, call_id):
248
+
249
+ clients[sid].call_id = call_id
250
+ gunicorn_logger.info(f"ANSWER {sid}: entering room {call_id}")
251
+ rooms[call_id] = rooms.get(call_id, [])
252
+ if sid not in rooms[call_id] and len(rooms[call_id]) < 2:
253
+ rooms[call_id].append(sid)
254
+ sio.enter_room(sid, call_id)
255
+ else:
256
+ gunicorn_logger.info(f"ANSWER {sid}: room {call_id} is full")
257
+ # await sio.emit("room_full", room=call_id, to=sid)
258
+
259
+
260
+ # BO - Get call id from dictionary created during socketio connection
261
+ client_id = clients[sid].client_id
262
+
263
+ # BO -> Update Call Record with Callee field based on call_id
264
+ gunicorn_logger.warning(f"NOW UPDATING MongoDB RECORD FOR Caller with ID: {client_id} for call: {call_id}")
265
+ # BO -> Create Call Record with callee_id field (None for callee, duration, terms..)
266
+ request_data = {
267
+ "callee_id": client_id
268
+ }
269
+
270
+ response = update_calls(get_collection_calls(), call_id, request_data)
271
+ print(response) # BO - print created db call record
272
+
273
+
274
+ @sio.on("incoming_audio")
275
+ async def incoming_audio(sid, data, call_id):
276
+ try:
277
+ clients[sid].add_bytes(data)
278
+
279
+ if clients[sid].get_length() >= MAX_BYTES_BUFFER:
280
+ gunicorn_logger.info('Buffer full, now outputting...')
281
+ output_path = clients[sid].output_path
282
+ resampled_audio = clients[sid].resample_and_clear()
283
+ vad_result = clients[sid].vad_analyse(resampled_audio)
284
+ # source lang is speakers tgt language 😃
285
+ src_lang = clients[sid].target_language
286
+
287
+ if vad_result:
288
+ gunicorn_logger.info('Speech detected, now processing audio.....')
289
+ tgt_sid = next(id for id in rooms[call_id] if id != sid)
290
+ tgt_lang = clients[tgt_sid].target_language
291
+ # following example from https://github.com/facebookresearch/seamless_communication/blob/main/docs/m4t/README.md#transformers-usage
292
+ output_tokens = processor(audios=resampled_audio, src_lang=src_lang, return_tensors="pt", sampling_rate=TARGET_SAMPLING_RATE).to(device)
293
+ model_output = model.generate(**output_tokens, tgt_lang=src_lang, generate_speech=False)[0].tolist()[0]
294
+ asr_text = processor.decode(model_output, skip_special_tokens=True)
295
+ print(f"ASR TEXT = {asr_text}")
296
+ # ASR TEXT => ORIGINAL TEXT
297
+
298
+ if src_lang != tgt_lang:
299
+ t2t_tokens = processor(text=asr_text, src_lang=src_lang, tgt_lang=tgt_lang, return_tensors="pt").to(device)
300
+ translated_data = model.generate(**t2t_tokens, tgt_lang=tgt_lang, generate_speech=False)[0].tolist()[0]
301
+ translated_text = processor.decode(translated_data, skip_special_tokens=True)
302
+ print(f"TRANSLATED TEXT = {translated_text}")
303
+ else:
304
+ # PM - both users have same language selected, no need to translate
305
+ translated_text = asr_text
306
+
307
+ # PM - text_output is a list with 1 string
308
+ await send_translated_text(clients[sid].client_id, clients[sid].username, asr_text, translated_text, call_id)
309
+
310
+ # BO -> send translated_text to mongodb as caption record update based on call_id
311
+ await send_captions(clients[sid].client_id, clients[sid].username, asr_text, translated_text, call_id)
312
+
313
+ except Exception as e:
314
+ gunicorn_logger.error(f"Error in incoming_audio: {e.with_traceback()}")
315
+
316
+
317
+ async def send_captions(client_id, username, original_text, translated_text, call_id):
318
+ # BO -> Update Call Record with Callee field based on call_id
319
+ print(f"Now updating Caption field in call record for Caller with ID: {client_id} for call: {call_id}")
320
+
321
+ data = {
322
+ "author_id": str(client_id),
323
+ "author_username": str(username),
324
+ "original_text": str(original_text),
325
+ "translated_text": str(translated_text),
326
+ "timestamp": str(datetime.now())
327
+ }
328
+
329
+ response = update_captions(get_collection_calls(), get_collection_users(), call_id, data)
330
+ return response
331
+
332
+
333
+ app.mount("/", socketio_app)
334
+
335
+
336
+ if __name__ == '__main__':
337
+ uvicorn.run("main:app", host='0.0.0.0', port=7860, log_level="info")
338
+
339
+
340
+ # Running in Docker Container
341
+ if __name__ != "__main__":
342
+ fastapi_logger.setLevel(gunicorn_logger.level)
343
+ else:
344
+ fastapi_logger.setLevel(logging.DEBUG)
backend/mongodb/endpoints/__pycache__/calls.cpython-310.pyc ADDED
Binary file (3.77 kB). View file
 
backend/mongodb/endpoints/__pycache__/users.cpython-310.pyc ADDED
Binary file (2.01 kB). View file
 
backend/mongodb/endpoints/calls.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, Body, Request, status, HTTPException
2
+ from typing import List
3
+ from datetime import date
4
+
5
+ import sys
6
+
7
+ from ..operations import calls as calls
8
+ from ..models.calls import UserCaptions, UserCall, UpdateCall
9
+ from ..endpoints.users import get_collection_users
10
+
11
+ router = APIRouter(prefix="/call",
12
+ tags=["Calls"])
13
+
14
+ def get_collection_calls(request: Request):
15
+ try:
16
+ return request.app.database["call_records"]
17
+ except:
18
+ raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Unable to find call records Database.")
19
+
20
+ @router.post("/create-call", response_description="Create a new user call record", status_code=status.HTTP_201_CREATED, response_model=UserCall)
21
+ async def create_calls(request: Request, user_calls: UserCall = Body(...)):
22
+ collection = get_collection_calls(request)
23
+ return calls.create_calls(collection, user_calls)
24
+
25
+ @router.get("/find-call/{call_id}", response_description="Find user's calls based on User ID", response_model=UserCall)
26
+ async def find_call(request: Request, call_id: str):
27
+ collection = get_collection_calls(request)
28
+ return calls.find_call(collection, call_id)
29
+
30
+ @router.get("/find-user-calls/{user_id}", response_description="Find user's calls based on User ID", response_model=List[UserCall])
31
+ async def find_user_calls(request: Request, user_id: str):
32
+ collection = get_collection_calls(request)
33
+ return calls.find_user_calls(collection, user_id)
34
+
35
+ @router.get("/get-captions/{call_id}/{user_id}", response_description="Find user's calls based on User ID")
36
+ async def get_caption_text(request: Request, call_id: str, user_id: str):
37
+ collection = get_collection_calls(request)
38
+ return calls.get_caption_text(collection, call_id, user_id)
39
+
40
+ @router.get("/find-duration/{min_len}/{max_len}", response_description="Find calls based on call duration in minutes", response_model=List[UserCall])
41
+ async def list_transcripts_by_duration(request: Request, min_len: int, max_len: int):
42
+ collection = get_collection_calls(request)
43
+ return calls.list_transcripts_by_duration(collection, min_len, max_len)
44
+
45
+ @router.put("/update-call/{call_id}", response_description="Update an existing call", response_model=UpdateCall)
46
+ async def update_calls(request: Request, call_id: str, user_calls: UpdateCall = Body(...)):
47
+ collection = get_collection_calls(request)
48
+ return calls.update_calls(collection, call_id, user_calls)
49
+
50
+ @router.put("/update-captions/{call_id}", response_description="Update an existing call", response_model=UpdateCall)
51
+ async def update_captions(request: Request, call_id: str, user_calls: UserCaptions = Body(...)):
52
+ call_collection = get_collection_calls(request)
53
+ user_collection = get_collection_users(request)
54
+ return calls.update_captions(call_collection, user_collection, call_id, user_calls)
55
+
56
+ @router.delete("/delete-call/{call_id}", response_description="Delete a call by its id")
57
+ async def delete_call(request: Request, call_id: str):
58
+ collection = get_collection_calls(request)
59
+ return calls.delete_calls(collection, call_id)
60
+
61
+ @router.get("/fuzzy-search/{user_id}/{query}", response_description="Perform fuzzy text search on caption fields", response_model=List[UserCall])
62
+ async def fuzzy_search(request: Request, user_id: str, query: str):
63
+ collection = get_collection_calls(request)
64
+ return calls.fuzzy_search(collection, user_id, query)
65
+
66
+ @router.get("/summarise/{call_id}/{user_id}/{target_language}", response_description="Perform gpt-3.5 summarisation on call_id")
67
+ async def summarise(request: Request, call_id: str, user_id: str, target_language: str):
68
+ collection = get_collection_calls(request)
69
+ return calls.summarise(collection, call_id, user_id, target_language)
70
+
71
+ @router.get("/term-extraction/{call_id}/{user_id}/{target_language}", response_description="Perform key term extraction on call record")
72
+ async def term_extraction(request: Request, call_id: str, user_id: str, target_language: str):
73
+ collection = get_collection_calls(request)
74
+ return calls.term_extraction(collection, call_id, user_id, target_language)
backend/mongodb/endpoints/users.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, Body, Request, status, HTTPException
2
+ from typing import List
3
+ import sys
4
+ from ..models.users import User, UpdateUser
5
+ from ..operations import users as users
6
+
7
+ router = APIRouter(prefix="/user",
8
+ tags=["User"])
9
+
10
+ def get_collection_users(request: Request):
11
+ db = request.app.database["user_records"]
12
+ return db
13
+
14
+ @router.post("/", response_description="Create a new user", status_code=status.HTTP_201_CREATED, response_model=User)
15
+ async def create_user(request: Request, user: User = Body(...)):
16
+ collection = get_collection_users(request)
17
+ return users.create_user(collection, user)
18
+
19
+ @router.get("/", response_description="List users", response_model=List[User])
20
+ async def list_users(request: Request):
21
+ collection = get_collection_users(request)
22
+ return users.list_users(collection, 100)
23
+
24
+ @router.put("/{user_id}", response_description="Update a User", response_model=UpdateUser)
25
+ async def update_user(request: Request, user_id: str, user: UpdateUser = Body(...)):
26
+ collection = get_collection_users(request)
27
+ return users.update_user(collection, user_id, user)
28
+
29
+ @router.get("/{user_id}", response_description="Get a single user by id", response_model=User)
30
+ async def find_user(request: Request, user_id: str):
31
+ collection = get_collection_users(request)
32
+ return users.find_user(collection, user_id)
33
+
34
+ @router.get("/find-name-id/{user_id}", response_description="Get a username from user id")
35
+ async def find_name_from_id(request: Request, user_id: str):
36
+ collection = get_collection_users(request)
37
+ return users.find_name_from_id(collection, user_id)
38
+
39
+ @router.delete("/{user_id}", response_description="Delete a user")
40
+ async def delete_user(request: Request, user_id:str):
41
+ collection = get_collection_users(request)
42
+ return users.delete_user(collection, user_id)
43
+
backend/mongodb/models/__pycache__/calls.cpython-310.pyc ADDED
Binary file (3.01 kB). View file
 
backend/mongodb/models/__pycache__/users.cpython-310.pyc ADDED
Binary file (1.52 kB). View file
 
backend/mongodb/models/calls.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import uuid
2
+ from typing import List, Dict, Optional
3
+ from datetime import datetime
4
+ from pydantic import BaseModel, Field, PrivateAttr
5
+ import sys
6
+
7
+
8
+ ''' Class for storing captions generated by SeamlessM4T'''
9
+ class UserCaptions(BaseModel):
10
+ _id: uuid.UUID = PrivateAttr(default_factory=uuid.uuid4) # private attr not included in http calls
11
+ author_id: Optional[str] = None
12
+ author_username: Optional[str] = None
13
+ original_text: str
14
+ translated_text: str
15
+ timestamp: datetime = Field(default_factory=datetime.now)
16
+
17
+ class Config:
18
+ populate_by_name = True
19
+ json_schema_extra = {
20
+ "example": {
21
+ "author_id": "gLZrfTwXyLUPB3eT7xT2HZnZiZT2",
22
+ "author_username": "shamzino",
23
+ "original_text": "eng: This is original_text english text",
24
+ "translated_text": "spa: este es el texto traducido al español",
25
+ "timestamp": "2024-03-28T16:15:50.956055",
26
+
27
+ }
28
+ }
29
+
30
+
31
+ '''Class for storing past call records from users'''
32
+ class UserCall(BaseModel):
33
+ _id: uuid.UUID = PrivateAttr(default_factory=uuid.uuid4)
34
+ call_id: Optional[str] = None
35
+ caller_id: Optional[str] = None
36
+ callee_id: Optional[str] = None
37
+ creation_date: datetime = Field(default_factory=datetime.now, alias="date")
38
+ captions: Optional[List[UserCaptions]] = None
39
+ key_terms: Optional[dict] = None
40
+ summaries: Optional[dict] = None
41
+
42
+
43
+ class Config:
44
+ populate_by_name = True
45
+ json_schema_extra = {
46
+ "example": {
47
+ "call_id": "65eef930e9abd3b1e3506906",
48
+ "caller_id": "65ede65b6d246e52aaba9d4f",
49
+ "callee_id": "65edda944340ac84c1f00758",
50
+ "captions": [{"author_id": "gLZrfTwXyLUPB3eT7xT2HZnZiZT2", "author_username": "shamzino", "original_text": "eng: This is original_text english text", "translated_text": "spa: este es el texto traducido al español", "timestamp": "2024-03-28T16:15:50.956055"},
51
+ {"author_id": "g7pR1qCibzQf5mDP9dGtcoWeEc92", "author_username": "benjino", "original_text": "eng: This is source english text", "translated_text": "spa: este es el texto fuente al español", "timestamp": "2024-03-28T16:16:20.34625"}],
52
+ "key_terms": {"gLZrfTwXyLUPB3eT7xT2HZnZiZT2": ["original_text", "source", "english", "text"], "g7pR1qCibzQf5mDP9dGtcoWeEc92": ["translated_text", "destination", "spanish", "text"]},
53
+ "summaries": {"gLZrfTwXyLUPB3eT7xT2HZnZiZT2": "This is a short test on lanuguage translation", "65edda944340ac84c1f00758": "Esta es una breve prueba sobre traducción de idiomas."}
54
+ }
55
+ }
56
+
57
+
58
+ ''' Class for updating User Call record'''
59
+ class UpdateCall(BaseModel):
60
+ call_id: Optional[str] = None
61
+ caller_id: Optional[str] = None
62
+ callee_id: Optional[str] = None
63
+ captions: Optional[List[UserCaptions]] = None
64
+ key_terms: Optional[List[str]] = None
65
+
66
+ class Config:
67
+ populate_by_name = True
68
+ json_schema_extra = {
69
+ "example": {
70
+ "duration": "500"
71
+ }
72
+ }
backend/mongodb/models/users.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import uuid
2
+ from typing import List, Optional
3
+ from pydantic import BaseModel, Field, SecretStr, PrivateAttr
4
+ from pydantic.networks import EmailStr
5
+
6
+
7
+ '''Class for user model used to relate users to past calls'''
8
+ class User(BaseModel):
9
+ _id: uuid.UUID = PrivateAttr(default_factory=uuid.uuid4) # private attr not included in http calls
10
+ user_id: str
11
+ name: str
12
+ email: EmailStr = Field(unique=True, index=True)
13
+
14
+ class Config:
15
+ populate_by_name = True
16
+ json_schema_extra = {
17
+ "example": {
18
+ "user_id": "65ede65b6d246e52aaba9d4f",
19
+ "name": "benjolo",
20
+ "email": "benjolounchained@gmail.com"
21
+ }
22
+ }
23
+
24
+ '''Class for updating user records'''
25
+ class UpdateUser(BaseModel):
26
+ user_id: Optional[str] = None
27
+ name: Optional[str] = None
28
+ email: Optional[EmailStr] = None
29
+
30
+ class Config:
31
+ populate_by_name = True
32
+ json_schema_extra = {
33
+ "example": {
34
+ "email": "benjolounchained21@gmail.com"
35
+ }
36
+ }
37
+
backend/mongodb/operations/__pycache__/calls.cpython-310.pyc ADDED
Binary file (5.01 kB). View file
 
backend/mongodb/operations/__pycache__/users.cpython-310.pyc ADDED
Binary file (2.89 kB). View file
 
backend/mongodb/operations/calls.py ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import Body, Request, HTTPException, status
2
+ from fastapi.encoders import jsonable_encoder
3
+ import sys
4
+ from ..models.calls import UpdateCall, UserCall, UserCaptions
5
+ from ..operations.users import *
6
+ from utils.text_rank import extract_terms
7
+ from openai import OpenAI
8
+
9
+ from time import sleep
10
+ import os
11
+ from dotenv import dotenv_values
12
+
13
+
14
+ # Used within calls to create call record in main.py
15
+ def create_calls(collection, user: UserCall = Body(...)):
16
+ calls = jsonable_encoder(user)
17
+ new_calls = collection.insert_one(calls)
18
+ created_calls = collection.find_one({"_id": new_calls.inserted_id})
19
+
20
+ return created_calls
21
+
22
+
23
+ '''Finding calls based on call id'''
24
+ def find_call(collection, call_id: str):
25
+ user_calls = collection.find_one({"call_id": call_id})
26
+ if user_calls is not None:
27
+ return user_calls
28
+ else:
29
+ raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call with ID: '{call_id}' not found.")
30
+
31
+
32
+ '''Finding calls based on user id'''
33
+ def find_user_calls(collection, user_id: str):
34
+ user_calls = list(collection.find({"$or": [{"caller_id": user_id}, {"callee_id": user_id}]})) # match on caller or callee ID
35
+ if len(user_calls):
36
+ return user_calls
37
+ else:
38
+ return [] # return empty list if no existing calls for TranscriptView frontend component
39
+
40
+
41
+ def update_calls(collection, call_id: str, calls: UpdateCall = Body(...)):
42
+ calls = {k: v for k, v in calls.items() if v is not None}
43
+ print(calls)
44
+
45
+ if len(calls) >= 1:
46
+ update_result = collection.update_one({"call_id": call_id}, {"$set": calls})
47
+
48
+ if update_result.modified_count == 0:
49
+ raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call not updated!")
50
+
51
+ if (existing_item := collection.find_one({"call_id": call_id})) is not None:
52
+ return existing_item
53
+
54
+ raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call not found!")
55
+
56
+
57
+ def update_captions(call_collection, user_collection, call_id: str, captions: UserCaptions = Body(...)):
58
+ captions = {k: v for k, v in captions.items() if v is not None}
59
+
60
+ # index user_id from caption object
61
+ userID = captions["author_id"]
62
+
63
+ # use user id to get user name
64
+ username = find_name_from_id(user_collection, userID)
65
+
66
+ # add user name to captions json/object
67
+ captions["author_username"] = username
68
+
69
+ if len(captions) >= 1:
70
+ update_result = call_collection.update_one({"call_id": call_id},
71
+ {"$push": {"captions": captions}})
72
+
73
+ if update_result.modified_count == 0:
74
+ raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Captions not updated!")
75
+
76
+ if (existing_item := call_collection.find_one({"call_id": call_id})) is not None:
77
+ return existing_item
78
+
79
+ raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Captions not found!")
80
+
81
+
82
+ def delete_calls(collection, call_id: str):
83
+ deleted_calls = collection.delete_one({"call_id": call_id})
84
+
85
+ if deleted_calls.deleted_count == 1:
86
+ return f"Call deleted sucessfully!"
87
+
88
+ raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call not found!")
89
+
90
+
91
+ def get_caption_text(collection, call_id, user_id):
92
+ call_record = find_call((collection), call_id)
93
+
94
+ try: # Check if call has any captions first
95
+ caption_records = call_record['captions']
96
+ except KeyError:
97
+ return None
98
+
99
+ combined_text = []
100
+
101
+ for caption_segment in caption_records:
102
+ if caption_segment['author_id'] == user_id:
103
+ combined_text.append(caption_segment['original_text'])
104
+ else:
105
+ combined_text.append(caption_segment['translated_text'])
106
+
107
+ return " ".join(combined_text)
108
+
109
+
110
+ # approximate string matching
111
+ def fuzzy_search(collection, user_id, query):
112
+
113
+ # drop any existing indexes and create new one
114
+ collection.drop_indexes()
115
+ collection.create_index([('captions.original_text', 'text'), ('captions.tranlated_text', 'text')],
116
+ name='captions')
117
+
118
+
119
+ pipeline = [
120
+ {
121
+ "$search": {
122
+ "text": {
123
+ "query": query,
124
+ "path": {"wildcard": "*"},
125
+ "fuzzy": {}
126
+ }
127
+ }
128
+ }
129
+ ]
130
+
131
+ collection_results = list(collection.aggregate(pipeline))
132
+
133
+ # add all users records to output
134
+ records = []
135
+
136
+ for doc in collection_results:
137
+ if doc['caller_id'] == user_id or doc['callee_id'] == user_id:
138
+ records.append(doc)
139
+
140
+ return records
141
+
142
+
143
+ def summarise(collection, call_id, user_id, target_language):
144
+ # client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
145
+
146
+ config = dotenv_values(".env")
147
+ client = OpenAI(api_key=config["OPENAI_API_KEY"])
148
+
149
+ # get caption text using call_id
150
+ caption_text = get_caption_text(collection, call_id, user_id)
151
+
152
+ chat_completion = client.chat.completions.create(
153
+ messages=[
154
+ {
155
+ "role": "user",
156
+ "content": f"The following is an extract from a call transcript. Rewrite this as a structured, clear summary in {target_language}. \
157
+ \n\Call Transcript: \"\"\"\n{caption_text}\n\"\"\"\n"
158
+ }
159
+ ],
160
+ model="gpt-3.5-turbo",
161
+ )
162
+
163
+ # Gpt-3.5 turbo has 4096 token limit -> request will fail if exceeded
164
+ try:
165
+ result = chat_completion.choices[0].message.content
166
+ except:
167
+ return None
168
+
169
+ # BO - add result to mongodb
170
+ update_result = collection.update_one({"call_id": call_id}, {"$set": {f"summaries.{user_id}": result}})
171
+
172
+ if update_result.modified_count == 0:
173
+ raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call not updated!")
174
+
175
+
176
+ # try parse summary and remove any leading summary prefixes
177
+ try:
178
+ return result.split(":")[1].strip()
179
+ except IndexError:
180
+ return result
181
+
182
+
183
+ def term_extraction(collection, call_id, user_id, target_language):
184
+
185
+ combined_text = get_caption_text(collection, call_id, user_id)
186
+
187
+ if len(combined_text) > 50: # > min_caption_length: -> poor term extraction on short transcripts
188
+
189
+ # Extract Key Terms from Concatenated Caption Field
190
+ key_terms = extract_terms(combined_text, target_language, len(combined_text))
191
+
192
+ update_result = collection.update_one({"call_id": call_id}, {"$set": {f"key_terms.{user_id}": key_terms}})
193
+
194
+ if update_result.modified_count == 0:
195
+ raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call not updated!")
196
+
197
+ return key_terms
backend/mongodb/operations/users.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import Body, Request, HTTPException, status
2
+ from fastapi.encoders import jsonable_encoder
3
+ import sys
4
+ from ..models.users import User, UpdateUser
5
+ from bson import ObjectId
6
+ import re
7
+
8
+
9
+ def create_user(collection, user: User = Body(...)):
10
+ user = jsonable_encoder(user)
11
+ new_user = collection.insert_one(user)
12
+ created_user = collection.find_one({"_id": new_user.inserted_id})
13
+ return created_user
14
+
15
+
16
+ def list_users(collection, limit: int):
17
+ try:
18
+ users = list(collection.find(limit = limit))
19
+ return users
20
+ except:
21
+ raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"No users found!")
22
+
23
+
24
+ def find_user(collection, user_id: str):
25
+ if (user := collection.find_one({"user_id": user_id})):
26
+ return user
27
+ else:
28
+ raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"User with user_id {user_id} not found!")
29
+
30
+ def find_name_from_id(collection, user_id: str):
31
+
32
+ # find_one user record based on user id and project for user name
33
+ if (user_name := collection.find_one({"user_id": user_id}, {"name": 1, "_id": 0})):
34
+ return user_name['name'] # index name field from single field record returned
35
+ else:
36
+ raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"User with user_id {user_id} not found!")
37
+
38
+ def find_user_name(collection, name: str):
39
+ # search for name in lowercase
40
+ if (user := collection.find_one({"name": re.compile('^' + re.escape(name) + '$', re.IGNORECASE)})):
41
+ return user
42
+ else:
43
+ raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"User with name {name} not found!")
44
+
45
+
46
+ def find_user_email(collection, email: str):
47
+ if (user := collection.find_one({"email": re.compile('^' + re.escape(email) + '$', re.IGNORECASE)})):
48
+ return user
49
+ else:
50
+ raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"User with Email Address {email} not found!")
51
+
52
+
53
+ ''' Update user record based on user object/json'''
54
+ def update_user(collection, user_id: str, user: UpdateUser):
55
+ try:
56
+ user = {k: v for k, v in user.model_dump().items() if v is not None}
57
+ if len(user) >= 1:
58
+ update_result = collection.update_one({"user_id": user_id}, {"$set": user})
59
+
60
+ if update_result.modified_count == 0:
61
+ raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"User with user_id: '{user_id}' not found and updated!")
62
+
63
+ if (existing_users := collection.find_one({"user_id": user_id})) is not None:
64
+ return existing_users
65
+ except:
66
+ raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"User with user_id: '{user_id}' not found and updated!")
67
+
68
+
69
+ def delete_user(collection, user_id: str):
70
+ try:
71
+ deleted_user = collection.delete_one({"user_id": user_id})
72
+
73
+ if deleted_user.deleted_count == 1:
74
+ return f"User with user_id {user_id} deleted sucessfully"
75
+ except:
76
+ raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"User with user_id {user_id} not found!")
backend/requirements.txt ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ colorlog==6.8.2
2
+ contextlib2==21.6.0
3
+ fastapi==0.110.1
4
+ g2p_en==2.1.0
5
+ matplotlib==3.7.0
6
+ numpy==1.24.2
7
+ openai==1.20.0
8
+ protobuf==5.26.1
9
+ pydantic==2.7.0
10
+ pydub==0.25.1
11
+ pymongo==4.6.2
12
+ PySoundFile==0.9.0.post1
13
+ python-dotenv==1.0.1
14
+ python-socketio==5.9.0
15
+ pymongo==4.6.2
16
+ Requests==2.31.0
17
+ sentencepiece==0.1.99
18
+ simuleval==1.1.4
19
+ soundfile==0.12.1
20
+ spacy==3.7.4
21
+ pytextrank==3.3.0
22
+ torch==2.1.2
23
+ torchaudio==2.1.2
24
+ #transformers==4.20.1
25
+ uvicorn==0.29.0
26
+ vad==1.0.2
27
+ hf_transfer==0.1.4
28
+ huggingface_hub==0.19.4
backend/routes/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from.routing import router
backend/routes/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (235 Bytes). View file
 
backend/routes/__pycache__/routing.cpython-310.pyc ADDED
Binary file (375 Bytes). View file
 
backend/routes/routing.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+ import sys
3
+ from mongodb.endpoints import users, calls
4
+
5
+ router = APIRouter()
6
+ router.include_router(calls.router)
7
+ router.include_router(users.router)
backend/tests/.pytest_cache/.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # Created by pytest automatically.
2
+ *
backend/tests/.pytest_cache/CACHEDIR.TAG ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Signature: 8a477f597d28d172789f06886806bc55
2
+ # This file is a cache directory tag created by pytest.
3
+ # For information about cache directory tags, see:
4
+ # https://bford.info/cachedir/spec.html
backend/tests/.pytest_cache/README.md ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # pytest cache directory #
2
+
3
+ This directory contains data from the pytest's cache plugin,
4
+ which provides the `--lf` and `--ff` options, as well as the `cache` fixture.
5
+
6
+ **Do not** commit this to version control.
7
+
8
+ See [the docs](https://docs.pytest.org/en/stable/how-to/cache.html) for more information.
backend/tests/.pytest_cache/v/cache/lastfailed ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test_client.py": true,
3
+ "unit_test.py::test_create_calls_success": true,
4
+ "unit_test.py::test_create_calls_failure": true,
5
+ "test_main.py::test_connect": true,
6
+ "test_main.py::test_disconnect": true,
7
+ "test_main.py::test_target_language": true,
8
+ "test_main.py::test_incoming_audio": true,
9
+ "test_main.py": true,
10
+ "unit_test.py::TestClient": true
11
+ }
backend/tests/.pytest_cache/v/cache/nodeids ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ "integration_test.py::test_extracion_pass2",
3
+ "integration_test.py::test_extraction_fail",
4
+ "integration_test.py::test_extraction_pass",
5
+ "integration_test.py::test_search_fail",
6
+ "integration_test.py::test_search_pass",
7
+ "integration_test.py::test_search_pass2",
8
+ "integration_test.py::test_summary_fail",
9
+ "integration_test.py::test_summary_fail2",
10
+ "integration_test.py::test_summary_pass",
11
+ "integration_test.py::test_summary_pass2",
12
+ "test_client.py::test_client_add_bytes",
13
+ "test_client.py::test_client_init",
14
+ "test_client.py::test_client_resample_and_clear",
15
+ "test_client.py::test_client_vad",
16
+ "test_main.py::test_connect",
17
+ "test_main.py::test_disconnect",
18
+ "test_main.py::test_incoming_audio",
19
+ "test_main.py::test_target_language",
20
+ "unit_test.py::test_create_call_pass",
21
+ "unit_test.py::test_create_calls_failure",
22
+ "unit_test.py::test_create_calls_success",
23
+ "unit_test.py::test_create_user_pass",
24
+ "unit_test.py::test_delete_user_fail",
25
+ "unit_test.py::test_delete_user_pass",
26
+ "unit_test.py::test_find_call_fail",
27
+ "unit_test.py::test_find_call_pass",
28
+ "unit_test.py::test_find_name_id_fail",
29
+ "unit_test.py::test_find_name_id_pass",
30
+ "unit_test.py::test_find_user_call_fail",
31
+ "unit_test.py::test_find_user_call_pass",
32
+ "unit_test.py::test_find_user_fail",
33
+ "unit_test.py::test_find_user_pass",
34
+ "unit_test.py::test_get_captions_fail",
35
+ "unit_test.py::test_get_captions_pass",
36
+ "unit_test.py::test_root_pass",
37
+ "unit_test.py::test_update_call_fail",
38
+ "unit_test.py::test_update_call_pass",
39
+ "unit_test.py::test_update_caption_pass",
40
+ "unit_test.py::test_update_user_fail",
41
+ "unit_test.py::test_update_user_pass"
42
+ ]
backend/tests/.pytest_cache/v/cache/stepwise ADDED
@@ -0,0 +1 @@
 
 
1
+ []
backend/tests/__init__.py ADDED
File without changes
backend/tests/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (196 Bytes). View file
 
backend/tests/__pycache__/integration_test.cpython-310-pytest-8.1.1.pyc ADDED
Binary file (3.98 kB). View file
 
backend/tests/__pycache__/test_client.cpython-310-pytest-8.1.1.pyc ADDED
Binary file (6.95 kB). View file
 
backend/tests/__pycache__/test_main.cpython-310-pytest-8.1.1.pyc ADDED
Binary file (3.92 kB). View file
 
backend/tests/__pycache__/test_main.cpython-310.pyc ADDED
Binary file (2.2 kB). View file
 
backend/tests/__pycache__/unit_test.cpython-310-pytest-8.1.1.pyc ADDED
Binary file (6.16 kB). View file
 
backend/tests/integration_test.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import dotenv_values
3
+ from fastapi import FastAPI
4
+ from pymongo import MongoClient
5
+ from main import requests
6
+ import uuid
7
+ import pytest
8
+ from dotenv import load_dotenv
9
+ import requests
10
+ import json
11
+
12
+ # Test Fuzzy Search Integrated component on existing call records
13
+ def test_search_pass():
14
+
15
+ # Test against records with mention of 'Football'
16
+ response = requests.get("http://127.0.0.1:8080/call/fuzzy-search/ozpHhyum3sayTdxIKUAtF51uvWJ2/football")
17
+
18
+ assert response.status_code == 200
19
+ assert len(response.json()) == 3 # three matching call transcripts
20
+
21
+ # Test Fuzzy Search Integrated component on existing call records
22
+ def test_search_pass2():
23
+
24
+ # Test against records with mention of 'Football' mispelled as 'Footbll'
25
+ response = requests.get("http://127.0.0.1:8080/call/fuzzy-search/ozpHhyum3sayTdxIKUAtF51uvWJ2/footbll")
26
+
27
+ assert response.status_code == 200
28
+ assert len(response.json()) == 3 # still three matching call transcripts
29
+
30
+ # Test Fuzzy Search Integrated component on existing call records
31
+ def test_search_fail():
32
+
33
+ # Test against records with mention of 'Football
34
+ response = requests.get("http://127.0.0.1:8080/call/fuzzy-search/ozpHhyum3sayTdxIKUAtF51uvWJ2/basketball")
35
+
36
+ assert response.status_code == 200
37
+ assert len(response.json()) == 0 # no matching call transcripts
38
+
39
+
40
+
41
+
42
+ # Test Summarisation Integrated component on existing call records
43
+ def test_summary_pass():
44
+
45
+ # Test with summarisation of english version transcript
46
+ response = requests.get("http://127.0.0.1:8080//call/summarise/FCnORXmLkw48G5mgscBV/ozpHhyum3sayTdxIKUAtF51uvWJ2/eng")
47
+
48
+ assert response.status_code == 200
49
+
50
+ def test_summary_pass2():
51
+
52
+ # Test with summarisation of polish version transcript
53
+ response = requests.get("http://127.0.0.1:8080//call/summarise/FCnORXmLkw48G5mgscBV/fNGMkWoSK7fxwE3tbp8E816sthd2/pol")
54
+
55
+ assert response.status_code == 200
56
+
57
+ def test_summary_fail():
58
+
59
+ # Test with summarisation of english version transcript
60
+ response = requests.get("http://127.0.0.1:8080//call/summarise/falseID/ozpHhyum3sayTdxIKUAtF51uvWJ2/eng") # non exising call record
61
+
62
+ assert response.status_code == 404
63
+
64
+ def test_summary_fail2():
65
+
66
+ # Test with summarisation of english version transcript
67
+ response = requests.get("http://127.0.0.1:8080//call/summarise/FCnORXmLkw48G5mgscBV/falseID/eng") # non exising user record
68
+
69
+ assert response.status_code == 404
70
+
71
+
72
+ # Test Key Key Extraction Integrated component on existing call records
73
+ def test_extraction_pass():
74
+
75
+ # Test against records with mention of 'Football'
76
+ response = requests.get("http://127.0.0.1:8080//call/term-extraction/FCnORXmLkw48G5mgscBV/ozpHhyum3sayTdxIKUAtF51uvWJ2/eng")
77
+
78
+ assert response.status_code == 200
79
+ assert len(response.json()) == 3 # still three matching call transcripts
80
+
81
+
82
+ # Test Fuzzy Search Integrated component on existing call records
83
+ def test_extracion_pass2():
84
+
85
+ # Test against records with mention of 'Football' mispelled as 'Footbll'
86
+ response = requests.get("http://127.0.0.1:8080//call/term-extraction/FCnORXmLkw48G5mgscBV/fNGMkWoSK7fxwE3tbp8E816sthd2/pol")
87
+
88
+ assert response.status_code == 200
89
+ assert len(response.json()) == 3 # still three matching call transcripts
90
+
91
+ # Test Fuzzy Search Integrated component on existing call records
92
+ def test_extraction_fail():
93
+
94
+ # Test against records with mention of 'Football
95
+ response = requests.get("http://127.0.0.1:8080//call/term-extraction/FCnORXmLkw48G5mgscBV/ozpHhyum3sayTdxIKUAtF51uvWJ2/eng")
96
+
97
+ assert response.status_code == 200
98
+ assert len(response.json()) == 0 # no matching call transcripts
backend/tests/silence.wav ADDED
Binary file (302 kB). View file
 
backend/tests/speaking.wav ADDED
Binary file (255 kB). View file
 
backend/tests/test_client.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import wave
3
+ import pytest
4
+ import torchaudio
5
+ import os
6
+ import sys
7
+
8
+ current_dir = os.path.dirname(os.path.abspath(__file__))
9
+ parent_dir = os.path.dirname(current_dir)
10
+ sys.path.append(parent_dir)
11
+ from Client import Client
12
+
13
+
14
+ @pytest.fixture
15
+ def mock_client():
16
+ client = Client("test_sid", "test_client_id", "testusername", original_sr=44100)
17
+ return client
18
+
19
+ def test_client_init(mock_client):
20
+ assert mock_client.sid == "test_sid"
21
+ assert mock_client.client_id == "test_client_id"
22
+ assert mock_client.call_id == None
23
+ assert mock_client.buffer == bytearray()
24
+ assert mock_client.output_path == "test_sid_output_audio.wav"
25
+ assert mock_client.target_language == None
26
+ assert mock_client.original_sr == 44100
27
+ assert mock_client.vad.sample_rate == 16000
28
+ assert mock_client.vad.frame_length == 25
29
+ assert mock_client.vad.frame_shift == 20
30
+ assert mock_client.vad.energy_threshold == 0.05
31
+ assert mock_client.vad.pre_emphasis == 0.95
32
+
33
+ def test_client_add_bytes(mock_client):
34
+ mock_client.add_bytes(b"test")
35
+ assert mock_client.buffer == b"test"
36
+
37
+ def test_client_resample_and_clear(mock_client):
38
+ location = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
39
+ speaking_bytes = wave.open(location + "/speaking.wav", "rb").readframes(-1)
40
+ mock_client.add_bytes(speaking_bytes)
41
+ resampled_waveform = mock_client.resample_and_clear()
42
+ torchaudio.save(location + "testoutput.wav", resampled_waveform, 16000)
43
+ with wave.open(location + "testoutput.wav", "rb") as wf:
44
+ sample_rate = wf.getframerate()
45
+ assert mock_client.buffer == bytearray()
46
+ assert sample_rate == 16000
47
+
48
+ def test_client_vad(mock_client):
49
+ location = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
50
+ speaking_bytes = wave.open(location + "/speaking.wav", "rb").readframes(-1)
51
+ mock_client.add_bytes(speaking_bytes)
52
+ resampled_waveform = mock_client.resample_and_clear()
53
+ assert mock_client.buffer == bytearray()
54
+ assert mock_client.vad_analyse(resampled_waveform) == True
55
+ silent_bytes = wave.open(location + "/silence.wav", "rb").readframes(-1)
56
+ mock_client.add_bytes(silent_bytes)
57
+ resampled_waveform = mock_client.resample_and_clear()
58
+ assert mock_client.buffer == bytearray()
59
+ assert mock_client.vad_analyse(resampled_waveform) == False
backend/tests/test_main.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import dotenv_values
2
+ from fastapi import FastAPI
3
+ from pymongo import MongoClient
4
+ import pytest
5
+ from unittest.mock import AsyncMock, MagicMock, ANY
6
+ import socketio
7
+
8
+ import os
9
+ import sys
10
+
11
+ current_dir = os.path.dirname(os.path.abspath(__file__))
12
+ parent_dir = os.path.dirname(current_dir)
13
+ sys.path.append(parent_dir)
14
+
15
+ from Client import Client
16
+ from main import sio, connect, disconnect, target_language, call_user, answer_call, incoming_audio, clients, rooms, app
17
+ from unittest.mock import patch
18
+
19
+ sio = socketio.AsyncServer(
20
+ async_mode="asgi",
21
+ cors_allowed_origins="*",
22
+ # engineio_logger=logger,
23
+ )
24
+
25
+ config = dotenv_values(".env")
26
+
27
+ # Read connection string from environment vars
28
+ # uri = os.environ['MONGODB_URI']
29
+
30
+ # Read connection string from .env file
31
+ uri = config['MONGODB_URI']
32
+ app.mongodb_client = MongoClient(uri)
33
+ app.database = app.mongodb_client['IT-Cluster1'] #connect to interpretalk primary db
34
+ try:
35
+ app.mongodb_client.admin.command('ping')
36
+ print("MongoDB Connection Established...")
37
+ except Exception as e:
38
+ print(e)
39
+ # shutdown logic
40
+ print("Closing MongoDB Connection...")
41
+
42
+ @pytest.fixture(autouse=True)
43
+ def setup_clients_and_rooms():
44
+ global clients, rooms
45
+ clients.clear()
46
+ rooms.clear()
47
+ yield
48
+
49
+ @pytest.fixture
50
+ def mock_client():
51
+ client = Client("test_sid", "test_client_id", "testusername", original_sr=44100)
52
+ return client
53
+
54
+
55
+ @pytest.mark.asyncio
56
+ async def test_connect(mock_client):
57
+ sid = mock_client.sid
58
+ environ = {'QUERY_STRING': 'client_id=test_client_id'}
59
+ await connect(sid, environ)
60
+ app.mongodb_client.close()
61
+ assert sid in clients
62
+
63
+ @pytest.mark.asyncio
64
+ async def test_disconnect(mock_client):
65
+ sid = mock_client.sid
66
+ clients[sid] = mock_client
67
+ await disconnect(sid)
68
+ assert sid not in clients
69
+
70
+ @pytest.mark.asyncio
71
+ async def test_target_language(mock_client):
72
+ sid = mock_client.sid
73
+ clients[sid] = mock_client
74
+ target_lang = "fr"
75
+ await target_language(sid, target_lang)
76
+ assert clients[sid].target_language == "fr"
77
+
78
+ @pytest.mark.asyncio
79
+ async def test_incoming_audio(mock_client):
80
+ sid = mock_client.sid
81
+ clients[sid] = mock_client
82
+ data = b"\x01"
83
+ call_id = "1234"
84
+ await incoming_audio(sid, data, call_id)
85
+ assert clients[sid].get_length() != 0
86
+
backend/tests/unit_test.py ADDED
@@ -0,0 +1,277 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import dotenv_values
3
+ from fastapi import FastAPI
4
+ from pymongo import MongoClient
5
+ from main import requests
6
+ import uuid
7
+ import pytest
8
+ from dotenv import load_dotenv
9
+ import requests
10
+ import json
11
+
12
+
13
+ # Test Root endpoint
14
+ def test_root_pass():
15
+ response = requests.get("http://127.0.0.1:8080/")
16
+
17
+ assert response.status_code == 200
18
+ assert response.json() == {"message": "Welcome to InterpreTalk!"}
19
+
20
+
21
+ # POST /user/
22
+ # Test DB user record creation including response validation
23
+ def test_create_user_pass():
24
+ payload = {
25
+ "name": "Tester1",
26
+ "user_id": "testerID",
27
+ "email": "tester1@gmail.com"
28
+ }
29
+
30
+ response = requests.post("http://127.0.0.1:8080/user/", json=payload)
31
+ assert response.status_code == 201
32
+
33
+
34
+ '''Test User Endpoints'''
35
+
36
+ # GET /user/
37
+ # Test finding DB user record based on user ID
38
+ def test_find_user_pass():
39
+ response = requests.get("http://localhost:8080/user/ozpHhyum3sayTdxIKUAtF51uvWJ2") # existing user record
40
+
41
+ assert response.status_code == 200
42
+ assert response.json() == {
43
+ "user_id": "ozpHhyum3sayTdxIKUAtF51uvWJ2",
44
+ "name": "Benjamin",
45
+ "email": "benjolounchained@gmail.com"
46
+ }
47
+
48
+
49
+ def test_find_user_fail():
50
+ response = requests.get(f"http://127.0.0.1:8080/users/fakeID") # non-existing user record
51
+
52
+ # check if response is inteded error code
53
+ assert response.status_code == 404
54
+
55
+
56
+ # PUT /user/{user_id}
57
+ # Updating DB user record based on user ID
58
+ def test_update_user_pass():
59
+ payload = {
60
+ "name": "TesterNewName"
61
+ }
62
+ response = requests.patch(f"http://127.0.0.1:8080/users/testerID", json=payload)
63
+
64
+ assert response.status_code == 202
65
+ assert response.json() == {
66
+ "name": "TesterNewName",
67
+ "user_id": "testerID",
68
+ "email": "tester1@gmail.com"
69
+ }
70
+
71
+ # Test with non-existing user ID
72
+ def test_update_user_fail():
73
+ payload = {
74
+ "name": "TesterNewName"
75
+ }
76
+
77
+ response = requests.patch(f"http://127.0.0.1:8080/users/falseID", json=payload)
78
+ assert response.status_code == 404
79
+
80
+
81
+ # DELETE /user/{user_id}
82
+ def test_delete_user_pass():
83
+ response = requests.delete(f"http://127.0.0.1:8080/users/testerID")
84
+ assert response.status_code == 200
85
+
86
+ def test_delete_user_fail():
87
+ response = requests.delete(f"http://127.0.0.1:8080/users/fakeID")
88
+ assert response.status_code == 404
89
+
90
+
91
+ # GET /user/find-name-id/{user_ud}
92
+ def test_find_name_id_pass():
93
+ response = requests.get("http://127.0.0.1:8080/user/find-name-id/ozpHhyum3sayTdxIKUAtF51uvWJ2")
94
+ assert response.status_code == 201
95
+ assert response.json == {
96
+ 'name': "Benjamin"
97
+ }
98
+
99
+ def test_find_name_id_fail():
100
+ response = requests.get("http://127.0.0.1:8080/user/find-name-id/falseID")
101
+ assert response.status_code == 404
102
+
103
+
104
+
105
+ '''Test Call endpoints'''
106
+
107
+ # POST /call/create-call
108
+ # Test creating call record
109
+ def test_create_call_pass():
110
+ payload = {
111
+ "call_id": "test001",
112
+ "caller_id": "tester01",
113
+ "callee_id": "tester02",
114
+ "captions": [
115
+ {
116
+ "author_id": "tester01",
117
+ "author_username": "tester",
118
+ "original_text": "It is a test",
119
+ "translated_text": "Es un prueba",
120
+ }
121
+ ]
122
+ }
123
+
124
+ response = requests.post("http://127.0.0.1:8080/call/create-call", json=payload)
125
+ assert response.status_code == 201
126
+
127
+
128
+ # GET /call/find-call
129
+ # Test finding DB call record based on call ID
130
+ def test_find_call_pass():
131
+ response = requests.get(f"http://127.0.0.1:8080/call/test001") # existing user record
132
+
133
+ assert response.status_code == 200
134
+ assert response.json() == {
135
+ "call_id": "test001",
136
+ "caller_id": "tester01",
137
+ "callee_id": "tester02",
138
+ "captions": [
139
+ {
140
+ "author_id": "tester01",
141
+ "author_username": "tester",
142
+ "original_text": "It is a test",
143
+ "translated_text": "Es un prueba",
144
+ }
145
+ ]
146
+ }
147
+
148
+
149
+ def test_find_call_fail():
150
+ response = requests.get(f"http://127.0.0.1:8080/call/fakeID") # non-existing user record
151
+
152
+ # check if response is inteded error code
153
+ assert response.status_code == 404
154
+
155
+
156
+ # GET /call/find-user-call
157
+ # Test finding DB call record based on user ID
158
+ def test_find_user_call_pass():
159
+ response = requests.get(f"http://127.0.0.1:8080/call/find-user-calls/tester01") # existing user record
160
+
161
+ assert response.status_code == 200
162
+ assert response.json() == {
163
+ "call_id": "test001",
164
+ "caller_id": "tester01",
165
+ "callee_id": "tester02",
166
+ "captions": [
167
+ {
168
+ "author_id": "tester01",
169
+ "author_username": "tester",
170
+ "original_text": "It is a test",
171
+ "translated_text": "Es un prueba",
172
+ }
173
+ ]
174
+ }
175
+
176
+ def test_find_user_call_fail():
177
+ response = requests.get(f"http://127.0.0.1:8080/calls/fakeID") # non-existing user record
178
+
179
+ # check if response is inteded error code
180
+ assert response.status_code == 404
181
+
182
+
183
+ # GET /call/get-captions
184
+ # Test finding DB call record based on user ID
185
+ def test_get_captions_pass():
186
+ response = requests.get(f"http://127.0.0.1:8080/call/find-user-calls/test001/tester01") # existing user record
187
+
188
+ assert response.status_code == 200
189
+ assert response.json() == {
190
+ "call_id": "test001",
191
+ "caller_id": "tester01",
192
+ "callee_id": "tester02",
193
+ "captions": [
194
+ {
195
+ "author_id": "tester01",
196
+ "author_username": "tester",
197
+ "original_text": "It is a test",
198
+ "translated_text": "Es un prueba",
199
+ }
200
+ ]
201
+ }
202
+
203
+ def test_get_captions_fail():
204
+ response = requests.get(f"http://127.0.0.1:8080/call/find-user-calls/test001/tester00") # fake user record
205
+
206
+ # check if response is inteded error code
207
+ assert response.status_code == 404
208
+
209
+ def test_get_captions_fail():
210
+ response = requests.get(f"http://127.0.0.1:8080/call/find-user-calls/test000/tester01") # fake call record
211
+
212
+ # check if response is inteded error code
213
+ assert response.status_code == 404
214
+
215
+
216
+ # GET /call/update-call/{call_id}
217
+ # test updating call record based on id
218
+ def test_update_call_pass():
219
+ payload = {
220
+ "callee_id": "TesterNewName"
221
+ }
222
+ response = requests.patch(f"http://127.0.0.1:8080/call/update-call/tester02", json=payload)
223
+
224
+ assert response.status_code == 202
225
+ assert response.json() == {
226
+ "call_id": "test001",
227
+ "caller_id": "tester01",
228
+ "callee_id": "tester02",
229
+ "captions": [
230
+ {
231
+ "author_id": "tester01",
232
+ "author_username": "tester",
233
+ "original_text": "It is a test",
234
+ "translated_text": "Es un prueba",
235
+ }
236
+ ]
237
+ }
238
+
239
+ # Test with non-existing user ID
240
+ def test_update_call_fail():
241
+ payload = {
242
+ "callee_id": "testName"
243
+ }
244
+
245
+ response = requests.patch(f"http://127.0.0.1:8080/users/falseID", json=payload)
246
+ assert response.status_code == 404
247
+
248
+
249
+ # GET /call/update-captions/{call_id}
250
+ # test updating caption record based on id
251
+ def test_update_caption_pass():
252
+ payload = {
253
+ "author_username": "testerNew"
254
+ }
255
+ response = requests.patch(f"http://127.0.0.1:8080/call/update-caption/tester01", json=payload)
256
+
257
+ assert response.status_code == 202
258
+
259
+
260
+ # Test with non-existing user ID
261
+ def test_update_call_fail():
262
+ payload = {
263
+ "callee_id": "testName"
264
+ }
265
+
266
+ response = requests.patch(f"http://127.0.0.1:8080/update-caption/falseID", json=payload)
267
+ assert response.status_code == 404
268
+
269
+
270
+ # DELETE /call/delete-call/{call_id}
271
+ def test_delete_user_pass():
272
+ response = requests.delete(f"http://127.0.0.1:8080//call/delete-call/test001")
273
+ assert response.status_code == 200
274
+
275
+ def test_delete_user_fail():
276
+ response = requests.delete(f"http://127.0.0.1:8080//call/delete-call/test009")
277
+ assert response.status_code == 404
backend/utils/__pycache__/text_rank.cpython-310.pyc ADDED
Binary file (2.03 kB). View file
 
backend/utils/text_rank.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spacy
2
+ import pytextrank
3
+ from spacy.tokens import Span
4
+
5
+ # Define decorator for converting to singular version of words
6
+ @spacy.registry.misc("plural_scrubber")
7
+ def plural_scrubber():
8
+ def scrubber_func(span: Span) -> str:
9
+ return span.lemma_
10
+ return scrubber_func
11
+
12
+
13
+ def model_selector(target_language: str):
14
+
15
+ # Load subset of non-english models
16
+ language_model = {
17
+ "spa": "es_core_news_sm",
18
+ "fra": "fr_core_news_sm",
19
+ "pol": "pl_core_news_sm",
20
+ "deu": "de_core_news_sm",
21
+ "ita": "it_core_news_sm",
22
+ "por": "pt_core_news_sm",
23
+ "nld": "nl_core_news_sm",
24
+ "fin": "fi_core_news_sm",
25
+ "ron": "ro_core_news_sm",
26
+ "rus": "ru_core_news_sm"
27
+ }
28
+
29
+ try:
30
+ nlp = spacy.load(language_model[target_language])
31
+
32
+ except KeyError:
33
+ # Load a spaCy English model
34
+ nlp = spacy.load("en_core_web_lg")
35
+
36
+ # Add TextRank component to pipeline with stopwords
37
+ nlp.add_pipe("textrank", config={
38
+ "stopwords": {token:["NOUN"] for token in nlp.Defaults.stop_words},
39
+ "scrubber": {"@misc": "plural_scrubber"}})
40
+
41
+ return nlp
42
+
43
+
44
+ def extract_terms(text, target_language, length):
45
+ nlp = model_selector(target_language)
46
+
47
+ # Perform fact extraction on overall summary and segment summaries
48
+ doc = nlp(text)
49
+
50
+ if length < 100:
51
+ # Get single most used key term
52
+ phrases = {phrase.text for phrase in doc._.phrases[:1]}
53
+ elif length > 100 and length < 300:
54
+ # Create unique set from top 2 ranked phrases
55
+ phrases = {phrase.text for phrase in doc._.phrases[:2]}
56
+ if length > 300:
57
+ # Create unique set from top 3 ranked phrases
58
+ phrases = {phrase.text for phrase in doc._.phrases[:3]}
59
+
60
+ return list(phrases)