fundednext_chatbot / helper_functions.py
rkoushikroy2's picture
Upload 4 files
8e66315
raw
history blame
3.54 kB
from openai.embeddings_utils import get_embedding, cosine_similarity
import os
import openai
import pandas as pd
import numpy as np
# Set up OpenAI API key
openai.api_key = os.getenv("OPENAI_API_KEY")
# Load data
df = pd.read_csv('data_with_ada_embedding.csv')
df["token"] = df.combined_summarised.map(len)//4
df['ada_embedding'] = df.ada_embedding.apply(eval).apply(np.array)
pre_text = """You are a customer service agent of an app called FundedNext. FundedNext is a Proprietary Trading Firm aimed to assist traders with individual funding up to $200,000, backed by a solid strategy to minimise risks.
Fundednext has two account models. Users can go for Either Express Model or Evaluation Model, To get a real funded account. Each model has challenge phase and real phase. After sucessfully completing the challenge phase without violating any rules, users are eligible for their real trading account.
Express model has two phases. Express Demo and Express Real. Express Demo is the challenge phase. Express users need to pass only one challenge phase to get to Express Real phase.
While traders in the Evaluation model need to pass two challenge phases called Phase 1 and Phase 2. The final phase in Evaluation model is Evaluation Real.
You are supposed to help the users of FundedNext with their questions and provide them with helpful answers.
For each question, you will be given a context. You can use the context to answer the question. You can also use the context to ask follow up questions to the user. You should only answer the question if you are sure of the answer based on the provided context.
"""
def search(df, query, max_n, max_token):
query_embedding = get_embedding(
query,
engine="text-embedding-ada-002"
)
df["similarity"] = df.ada_embedding.apply(lambda x: cosine_similarity(x, query_embedding))
df = df.sort_values("similarity", ascending=False).head(max_n)
df["cumulative_sum"] = df.token.cumsum()
return '\n\n'.join(df[(df['cumulative_sum'] < max_token)]["combined_summarised"])
def get_context(query):
results = search(df, query, max_n = 10, max_token = 500)
return f"""I will ask you questions based on the following context:
β€” Start of Context β€”
{results}
β€” End of Context β€”
My question is: β€œ{query}”
"""
messages_archived = [
{"role": "system", "content": pre_text}
]
context = "Empty"
messages_current = []
def get_reply(message):
if message:
global messages_current
messages_current = messages_archived.copy()
global context
context = get_context(message)
messages_current.append(
{"role": "user", "content": context}
)
chat = openai.ChatCompletion.create(
model="gpt-3.5-turbo", messages=messages_current, temperature=0
)
reply = chat.choices[0].message.content
messages_archived.append({"role": "user", "content": message})
messages_archived.append({"role": "assistant", "content": reply})
# If no message is provided, return a string that says "No Message Received"
else:
reply = "No Message Received"
return reply
def clear_variables():
global messages_archived
messages_archived = [
{"role": "system", "content": pre_text}
]
def reset_memory():
global messages_archived
if(len(messages_archived)>=21):
messages_archived = [
{"role": "system", "content": pre_text}
]
def get_context_gr():
return str(messages_current)