Spaces:

benjolo
/

InterpreTalk

Paused

App Files Files Community

InterpreTalk / backend /mongodb /operations /calls.py

benjolo

Uploading completed backend

ddc5bbd verified 5 months ago

raw

history blame

No virus

6.62 kB

	from fastapi import Body, Request, HTTPException, status
	from fastapi.encoders import jsonable_encoder
	import sys
	from ..models.calls import UpdateCall, UserCall, UserCaptions
	from ..operations.users import *
	from utils.text_rank import extract_terms
	from openai import OpenAI

	from time import sleep
	import os
	from dotenv import dotenv_values


	# Used within calls to create call record in main.py
	def create_calls(collection, user: UserCall = Body(...)):
	calls = jsonable_encoder(user)
	new_calls = collection.insert_one(calls)
	created_calls = collection.find_one({"_id": new_calls.inserted_id})

	return created_calls


	'''Finding calls based on call id'''
	def find_call(collection, call_id: str):
	user_calls = collection.find_one({"call_id": call_id})
	if user_calls is not None:
	return user_calls
	else:
	raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call with ID: '{call_id}' not found.")


	'''Finding calls based on user id'''
	def find_user_calls(collection, user_id: str):
	user_calls = list(collection.find({"$or": [{"caller_id": user_id}, {"callee_id": user_id}]})) # match on caller or callee ID
	if len(user_calls):
	return user_calls
	else:
	return [] # return empty list if no existing calls for TranscriptView frontend component


	def update_calls(collection, call_id: str, calls: UpdateCall = Body(...)):
	calls = {k: v for k, v in calls.items() if v is not None}
	print(calls)

	if len(calls) >= 1:
	update_result = collection.update_one({"call_id": call_id}, {"$set": calls})

	if update_result.modified_count == 0:
	raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call not updated!")

	if (existing_item := collection.find_one({"call_id": call_id})) is not None:
	return existing_item

	raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call not found!")


	def update_captions(call_collection, user_collection, call_id: str, captions: UserCaptions = Body(...)):
	captions = {k: v for k, v in captions.items() if v is not None}

	# index user_id from caption object
	userID = captions["author_id"]

	# use user id to get user name
	username = find_name_from_id(user_collection, userID)

	# add user name to captions json/object
	captions["author_username"] = username

	if len(captions) >= 1:
	update_result = call_collection.update_one({"call_id": call_id},
	{"$push": {"captions": captions}})

	if update_result.modified_count == 0:
	raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Captions not updated!")

	if (existing_item := call_collection.find_one({"call_id": call_id})) is not None:
	return existing_item

	raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Captions not found!")


	def delete_calls(collection, call_id: str):
	deleted_calls = collection.delete_one({"call_id": call_id})

	if deleted_calls.deleted_count == 1:
	return f"Call deleted sucessfully!"

	raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call not found!")


	def get_caption_text(collection, call_id, user_id):
	call_record = find_call((collection), call_id)

	try: # Check if call has any captions first
	caption_records = call_record['captions']
	except KeyError:
	return None

	combined_text = []

	for caption_segment in caption_records:
	if caption_segment['author_id'] == user_id:
	combined_text.append(caption_segment['original_text'])
	else:
	combined_text.append(caption_segment['translated_text'])

	return " ".join(combined_text)


	# approximate string matching
	def fuzzy_search(collection, user_id, query):

	# drop any existing indexes and create new one
	collection.drop_indexes()
	collection.create_index([('captions.original_text', 'text'), ('captions.tranlated_text', 'text')],
	name='captions')


	pipeline = [
	{
	"$search": {
	"text": {
	"query": query,
	"path": {"wildcard": "*"},
	"fuzzy": {}
	}
	}
	}
	]

	collection_results = list(collection.aggregate(pipeline))

	# add all users records to output
	records = []

	for doc in collection_results:
	if doc['caller_id'] == user_id or doc['callee_id'] == user_id:
	records.append(doc)

	return records


	def summarise(collection, call_id, user_id, target_language):
	# client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

	config = dotenv_values(".env")
	client = OpenAI(api_key=config["OPENAI_API_KEY"])

	# get caption text using call_id
	caption_text = get_caption_text(collection, call_id, user_id)

	chat_completion = client.chat.completions.create(
	messages=[
	{
	"role": "user",
	"content": f"The following is an extract from a call transcript. Rewrite this as a structured, clear summary in {target_language}. \
	\n\Call Transcript: \"\"\"\n{caption_text}\n\"\"\"\n"
	}
	],
	model="gpt-3.5-turbo",
	)

	# Gpt-3.5 turbo has 4096 token limit -> request will fail if exceeded
	try:
	result = chat_completion.choices[0].message.content
	except:
	return None

	# BO - add result to mongodb
	update_result = collection.update_one({"call_id": call_id}, {"$set": {f"summaries.{user_id}": result}})

	if update_result.modified_count == 0:
	raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call not updated!")


	# try parse summary and remove any leading summary prefixes
	try:
	return result.split(":")[1].strip()
	except IndexError:
	return result


	def term_extraction(collection, call_id, user_id, target_language):

	combined_text = get_caption_text(collection, call_id, user_id)

	if len(combined_text) > 50: # > min_caption_length: -> poor term extraction on short transcripts

	# Extract Key Terms from Concatenated Caption Field
	key_terms = extract_terms(combined_text, target_language, len(combined_text))

	update_result = collection.update_one({"call_id": call_id}, {"$set": {f"key_terms.{user_id}": key_terms}})

	if update_result.modified_count == 0:
	raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Call not updated!")

	return key_terms