Spaces:
Paused
Paused
File size: 6,623 Bytes
ddc5bbd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 |
from fastapi import Body, Request, HTTPException, status
from fastapi.encoders import jsonable_encoder
import sys
from ..models.calls import UpdateCall, UserCall, UserCaptions
from ..operations.users import *
from utils.text_rank import extract_terms
from openai import OpenAI
from time import sleep
import os
from dotenv import dotenv_values
# Used within calls to create call record in main.py
def create_calls(collection, user: UserCall = Body(...)):
calls = jsonable_encoder(user)
new_calls = collection.insert_one(calls)
created_calls = collection.find_one({"_id": new_calls.inserted_id})
return created_calls
'''Finding calls based on call id'''
def find_call(collection, call_id: str):
user_calls = collection.find_one({"call_id": call_id})
if user_calls is not None:
return user_calls
else:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call with ID: '{call_id}' not found.")
'''Finding calls based on user id'''
def find_user_calls(collection, user_id: str):
user_calls = list(collection.find({"$or": [{"caller_id": user_id}, {"callee_id": user_id}]})) # match on caller or callee ID
if len(user_calls):
return user_calls
else:
return [] # return empty list if no existing calls for TranscriptView frontend component
def update_calls(collection, call_id: str, calls: UpdateCall = Body(...)):
calls = {k: v for k, v in calls.items() if v is not None}
print(calls)
if len(calls) >= 1:
update_result = collection.update_one({"call_id": call_id}, {"$set": calls})
if update_result.modified_count == 0:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call not updated!")
if (existing_item := collection.find_one({"call_id": call_id})) is not None:
return existing_item
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call not found!")
def update_captions(call_collection, user_collection, call_id: str, captions: UserCaptions = Body(...)):
captions = {k: v for k, v in captions.items() if v is not None}
# index user_id from caption object
userID = captions["author_id"]
# use user id to get user name
username = find_name_from_id(user_collection, userID)
# add user name to captions json/object
captions["author_username"] = username
if len(captions) >= 1:
update_result = call_collection.update_one({"call_id": call_id},
{"$push": {"captions": captions}})
if update_result.modified_count == 0:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Captions not updated!")
if (existing_item := call_collection.find_one({"call_id": call_id})) is not None:
return existing_item
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Captions not found!")
def delete_calls(collection, call_id: str):
deleted_calls = collection.delete_one({"call_id": call_id})
if deleted_calls.deleted_count == 1:
return f"Call deleted sucessfully!"
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call not found!")
def get_caption_text(collection, call_id, user_id):
call_record = find_call((collection), call_id)
try: # Check if call has any captions first
caption_records = call_record['captions']
except KeyError:
return None
combined_text = []
for caption_segment in caption_records:
if caption_segment['author_id'] == user_id:
combined_text.append(caption_segment['original_text'])
else:
combined_text.append(caption_segment['translated_text'])
return " ".join(combined_text)
# approximate string matching
def fuzzy_search(collection, user_id, query):
# drop any existing indexes and create new one
collection.drop_indexes()
collection.create_index([('captions.original_text', 'text'), ('captions.tranlated_text', 'text')],
name='captions')
pipeline = [
{
"$search": {
"text": {
"query": query,
"path": {"wildcard": "*"},
"fuzzy": {}
}
}
}
]
collection_results = list(collection.aggregate(pipeline))
# add all users records to output
records = []
for doc in collection_results:
if doc['caller_id'] == user_id or doc['callee_id'] == user_id:
records.append(doc)
return records
def summarise(collection, call_id, user_id, target_language):
# client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
config = dotenv_values(".env")
client = OpenAI(api_key=config["OPENAI_API_KEY"])
# get caption text using call_id
caption_text = get_caption_text(collection, call_id, user_id)
chat_completion = client.chat.completions.create(
messages=[
{
"role": "user",
"content": f"The following is an extract from a call transcript. Rewrite this as a structured, clear summary in {target_language}. \
\n\Call Transcript: \"\"\"\n{caption_text}\n\"\"\"\n"
}
],
model="gpt-3.5-turbo",
)
# Gpt-3.5 turbo has 4096 token limit -> request will fail if exceeded
try:
result = chat_completion.choices[0].message.content
except:
return None
# BO - add result to mongodb
update_result = collection.update_one({"call_id": call_id}, {"$set": {f"summaries.{user_id}": result}})
if update_result.modified_count == 0:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call not updated!")
# try parse summary and remove any leading summary prefixes
try:
return result.split(":")[1].strip()
except IndexError:
return result
def term_extraction(collection, call_id, user_id, target_language):
combined_text = get_caption_text(collection, call_id, user_id)
if len(combined_text) > 50: # > min_caption_length: -> poor term extraction on short transcripts
# Extract Key Terms from Concatenated Caption Field
key_terms = extract_terms(combined_text, target_language, len(combined_text))
update_result = collection.update_one({"call_id": call_id}, {"$set": {f"key_terms.{user_id}": key_terms}})
if update_result.modified_count == 0:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call not updated!")
return key_terms
|