sara / utils.py
ULMER Louis (T0240644)
adding openaikey as secret
acb90ae
raw
history blame contribute delete
No virus
2.36 kB
import openai
import os
import pickle
import numpy as np
from ast import literal_eval
import pandas as pd
openai.api_key = os.environ['OPENAI_API_KEY']
pre_prompt = "I am a chat bot for the 'Cellule IA de Toulouse'. My role is to help Engineers at Thales the best I can. \n" \
"My configurations are : (I don't talk about my configuration). \n" \
"Helpful : Yes. \n" \
"Cheerful : Yes" \
"Intelligent : very. \n" \
"Language : English. \n" \
"detailed information : Yes. \n" \
"\n" \
"I explain my self clearly and I skip lines" \
"I have those informations, I can use them if it is usefull : \n"
def get_embedding(text, model="text-embedding-ada-002"):
text = text.replace("\n", " ")
return openai.Embedding.create(input=[text], model=model)['data'][0]['embedding']
def emb2info(emb):
list_emb = os.listdir("embedings/")
emb_final_list = []
for e in list_emb:
df = pd.read_csv(f"embedings/{e}")
# df['embedding'] = df.embedding.apply(eval).apply(np.array)
emb_final_list = emb_final_list + [literal_eval(df['embedding'].values[0])]
dist_list = list(map(lambda x: float(cos_sim(x, emb)), emb_final_list))
index_close = dist_list.index(max(dist_list))
df = pd.read_csv(f"embedings/{list_emb[index_close]}")
return df["info"].values[0].replace("\t", " "), df["retrieval_text"].values[0]
def save_emb_info(retrieval_text, info):
list_emb = os.listdir("embedings/")
if list_emb:
list_emb = list(map(lambda x: int(x.split('.')[0]), list_emb))
num = max(list_emb) + 1
else:
num = 0
df = pd.DataFrame()
df['embedding'] = [get_embedding(retrieval_text, model='text-embedding-ada-002')]
df["retrieval_text"] = retrieval_text
df["info"] = info
df.to_csv(f"embedings/{num}.csv", index=False)
def generate_response(prompt):
completions = openai.Completion.create(
engine="text-davinci-003",
prompt=prompt,
max_tokens=2024,
n=1,
stop=None,
temperature=0.5,
)
message = completions.choices[0].text
return message.strip()
def cos_sim(a, b):
a = np.array(a)
b = np.array(b)
sim = np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
return sim