benjolo's picture
Uploading completed backend
ddc5bbd verified
raw
history blame
No virus
6.62 kB
from fastapi import Body, Request, HTTPException, status
from fastapi.encoders import jsonable_encoder
import sys
from ..models.calls import UpdateCall, UserCall, UserCaptions
from ..operations.users import *
from utils.text_rank import extract_terms
from openai import OpenAI
from time import sleep
import os
from dotenv import dotenv_values
# Used within calls to create call record in main.py
def create_calls(collection, user: UserCall = Body(...)):
calls = jsonable_encoder(user)
new_calls = collection.insert_one(calls)
created_calls = collection.find_one({"_id": new_calls.inserted_id})
return created_calls
'''Finding calls based on call id'''
def find_call(collection, call_id: str):
user_calls = collection.find_one({"call_id": call_id})
if user_calls is not None:
return user_calls
else:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call with ID: '{call_id}' not found.")
'''Finding calls based on user id'''
def find_user_calls(collection, user_id: str):
user_calls = list(collection.find({"$or": [{"caller_id": user_id}, {"callee_id": user_id}]})) # match on caller or callee ID
if len(user_calls):
return user_calls
else:
return [] # return empty list if no existing calls for TranscriptView frontend component
def update_calls(collection, call_id: str, calls: UpdateCall = Body(...)):
calls = {k: v for k, v in calls.items() if v is not None}
print(calls)
if len(calls) >= 1:
update_result = collection.update_one({"call_id": call_id}, {"$set": calls})
if update_result.modified_count == 0:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call not updated!")
if (existing_item := collection.find_one({"call_id": call_id})) is not None:
return existing_item
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call not found!")
def update_captions(call_collection, user_collection, call_id: str, captions: UserCaptions = Body(...)):
captions = {k: v for k, v in captions.items() if v is not None}
# index user_id from caption object
userID = captions["author_id"]
# use user id to get user name
username = find_name_from_id(user_collection, userID)
# add user name to captions json/object
captions["author_username"] = username
if len(captions) >= 1:
update_result = call_collection.update_one({"call_id": call_id},
{"$push": {"captions": captions}})
if update_result.modified_count == 0:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Captions not updated!")
if (existing_item := call_collection.find_one({"call_id": call_id})) is not None:
return existing_item
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Captions not found!")
def delete_calls(collection, call_id: str):
deleted_calls = collection.delete_one({"call_id": call_id})
if deleted_calls.deleted_count == 1:
return f"Call deleted sucessfully!"
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call not found!")
def get_caption_text(collection, call_id, user_id):
call_record = find_call((collection), call_id)
try: # Check if call has any captions first
caption_records = call_record['captions']
except KeyError:
return None
combined_text = []
for caption_segment in caption_records:
if caption_segment['author_id'] == user_id:
combined_text.append(caption_segment['original_text'])
else:
combined_text.append(caption_segment['translated_text'])
return " ".join(combined_text)
# approximate string matching
def fuzzy_search(collection, user_id, query):
# drop any existing indexes and create new one
collection.drop_indexes()
collection.create_index([('captions.original_text', 'text'), ('captions.tranlated_text', 'text')],
name='captions')
pipeline = [
{
"$search": {
"text": {
"query": query,
"path": {"wildcard": "*"},
"fuzzy": {}
}
}
}
]
collection_results = list(collection.aggregate(pipeline))
# add all users records to output
records = []
for doc in collection_results:
if doc['caller_id'] == user_id or doc['callee_id'] == user_id:
records.append(doc)
return records
def summarise(collection, call_id, user_id, target_language):
# client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
config = dotenv_values(".env")
client = OpenAI(api_key=config["OPENAI_API_KEY"])
# get caption text using call_id
caption_text = get_caption_text(collection, call_id, user_id)
chat_completion = client.chat.completions.create(
messages=[
{
"role": "user",
"content": f"The following is an extract from a call transcript. Rewrite this as a structured, clear summary in {target_language}. \
\n\Call Transcript: \"\"\"\n{caption_text}\n\"\"\"\n"
}
],
model="gpt-3.5-turbo",
)
# Gpt-3.5 turbo has 4096 token limit -> request will fail if exceeded
try:
result = chat_completion.choices[0].message.content
except:
return None
# BO - add result to mongodb
update_result = collection.update_one({"call_id": call_id}, {"$set": {f"summaries.{user_id}": result}})
if update_result.modified_count == 0:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call not updated!")
# try parse summary and remove any leading summary prefixes
try:
return result.split(":")[1].strip()
except IndexError:
return result
def term_extraction(collection, call_id, user_id, target_language):
combined_text = get_caption_text(collection, call_id, user_id)
if len(combined_text) > 50: # > min_caption_length: -> poor term extraction on short transcripts
# Extract Key Terms from Concatenated Caption Field
key_terms = extract_terms(combined_text, target_language, len(combined_text))
update_result = collection.update_one({"call_id": call_id}, {"$set": {f"key_terms.{user_id}": key_terms}})
if update_result.modified_count == 0:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call not updated!")
return key_terms