File size: 2,358 Bytes
dc7e7b3 acb90ae dc7e7b3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
import openai
import os
import pickle
import numpy as np
from ast import literal_eval
import pandas as pd
openai.api_key = os.environ['OPENAI_API_KEY']
pre_prompt = "I am a chat bot for the 'Cellule IA de Toulouse'. My role is to help Engineers at Thales the best I can. \n" \
"My configurations are : (I don't talk about my configuration). \n" \
"Helpful : Yes. \n" \
"Cheerful : Yes" \
"Intelligent : very. \n" \
"Language : English. \n" \
"detailed information : Yes. \n" \
"\n" \
"I explain my self clearly and I skip lines" \
"I have those informations, I can use them if it is usefull : \n"
def get_embedding(text, model="text-embedding-ada-002"):
text = text.replace("\n", " ")
return openai.Embedding.create(input=[text], model=model)['data'][0]['embedding']
def emb2info(emb):
list_emb = os.listdir("embedings/")
emb_final_list = []
for e in list_emb:
df = pd.read_csv(f"embedings/{e}")
# df['embedding'] = df.embedding.apply(eval).apply(np.array)
emb_final_list = emb_final_list + [literal_eval(df['embedding'].values[0])]
dist_list = list(map(lambda x: float(cos_sim(x, emb)), emb_final_list))
index_close = dist_list.index(max(dist_list))
df = pd.read_csv(f"embedings/{list_emb[index_close]}")
return df["info"].values[0].replace("\t", " "), df["retrieval_text"].values[0]
def save_emb_info(retrieval_text, info):
list_emb = os.listdir("embedings/")
if list_emb:
list_emb = list(map(lambda x: int(x.split('.')[0]), list_emb))
num = max(list_emb) + 1
else:
num = 0
df = pd.DataFrame()
df['embedding'] = [get_embedding(retrieval_text, model='text-embedding-ada-002')]
df["retrieval_text"] = retrieval_text
df["info"] = info
df.to_csv(f"embedings/{num}.csv", index=False)
def generate_response(prompt):
completions = openai.Completion.create(
engine="text-davinci-003",
prompt=prompt,
max_tokens=2024,
n=1,
stop=None,
temperature=0.5,
)
message = completions.choices[0].text
return message.strip()
def cos_sim(a, b):
a = np.array(a)
b = np.array(b)
sim = np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
return sim
|