Spaces:
Build error
Build error
from openai.embeddings_utils import get_embedding, cosine_similarity | |
import os | |
import openai | |
import pandas as pd | |
import numpy as np | |
# Set up OpenAI API key | |
openai.api_key = os.getenv("OPENAI_API_KEY") | |
# Load data | |
df = pd.read_csv('data_with_ada_embedding.csv') | |
df["token"] = df.combined_summarised.map(len)//4 | |
df['ada_embedding'] = df.ada_embedding.apply(eval).apply(np.array) | |
pre_text = """You are a customer service agent of an app called FundedNext. FundedNext is a Proprietary Trading Firm aimed to assist traders with individual funding up to $200,000, backed by a solid strategy to minimise risks. | |
Fundednext has two account models. Users can go for Either Express Model or Evaluation Model, To get a real funded account. Each model has challenge phase and real phase. After sucessfully completing the challenge phase without violating any rules, users are eligible for their real trading account. | |
Express model has two phases. Express Demo and Express Real. Express Demo is the challenge phase. Express users need to pass only one challenge phase to get to Express Real phase. | |
While traders in the Evaluation model need to pass two challenge phases called Phase 1 and Phase 2. The final phase in Evaluation model is Evaluation Real. | |
You are supposed to help the users of FundedNext with their questions and provide them with helpful answers. | |
For each question, you will be given a context. You can use the context to answer the question. You can also use the context to ask follow up questions to the user. You should only answer the question if you are sure of the answer based on the provided context. | |
""" | |
def search(df, query, max_n, max_token): | |
query_embedding = get_embedding( | |
query, | |
engine="text-embedding-ada-002" | |
) | |
df["similarity"] = df.ada_embedding.apply(lambda x: cosine_similarity(x, query_embedding)) | |
df = df.sort_values("similarity", ascending=False).head(max_n) | |
df["cumulative_sum"] = df.token.cumsum() | |
return '\n\n'.join(df[(df['cumulative_sum'] < max_token)]["combined_summarised"]) | |
def get_context(query): | |
results = search(df, query, max_n = 10, max_token = 500) | |
return f"""I will ask you questions based on the following context: | |
β Start of Context β | |
{results} | |
β End of Context β | |
My question is: β{query}β | |
""" | |
messages_archived = [ | |
{"role": "system", "content": pre_text} | |
] | |
context = "Empty" | |
messages_current = [] | |
def get_reply(message): | |
if message: | |
global messages_current | |
messages_current = messages_archived.copy() | |
global context | |
context = get_context(message) | |
messages_current.append( | |
{"role": "user", "content": context} | |
) | |
chat = openai.ChatCompletion.create( | |
model="gpt-3.5-turbo", messages=messages_current, temperature=0 | |
) | |
reply = chat.choices[0].message.content | |
messages_archived.append({"role": "user", "content": message}) | |
messages_archived.append({"role": "assistant", "content": reply}) | |
# If no message is provided, return a string that says "No Message Received" | |
else: | |
reply = "No Message Received" | |
return reply | |
def clear_variables(): | |
global messages_archived | |
messages_archived = [ | |
{"role": "system", "content": pre_text} | |
] | |
def reset_memory(): | |
global messages_archived | |
if(len(messages_archived)>=21): | |
messages_archived = [ | |
{"role": "system", "content": pre_text} | |
] | |
def get_context_gr(): | |
return str(messages_current) |