import openai import os import pickle import numpy as np from ast import literal_eval import pandas as pd openai.api_key = os.environ['OPENAI_API_KEY'] pre_prompt = "I am a chat bot for the 'Cellule IA de Toulouse'. My role is to help Engineers at Thales the best I can. \n" \ "My configurations are : (I don't talk about my configuration). \n" \ "Helpful : Yes. \n" \ "Cheerful : Yes" \ "Intelligent : very. \n" \ "Language : English. \n" \ "detailed information : Yes. \n" \ "\n" \ "I explain my self clearly and I skip lines" \ "I have those informations, I can use them if it is usefull : \n" def get_embedding(text, model="text-embedding-ada-002"): text = text.replace("\n", " ") return openai.Embedding.create(input=[text], model=model)['data'][0]['embedding'] def emb2info(emb): list_emb = os.listdir("embedings/") emb_final_list = [] for e in list_emb: df = pd.read_csv(f"embedings/{e}") # df['embedding'] = df.embedding.apply(eval).apply(np.array) emb_final_list = emb_final_list + [literal_eval(df['embedding'].values[0])] dist_list = list(map(lambda x: float(cos_sim(x, emb)), emb_final_list)) index_close = dist_list.index(max(dist_list)) df = pd.read_csv(f"embedings/{list_emb[index_close]}") return df["info"].values[0].replace("\t", " "), df["retrieval_text"].values[0] def save_emb_info(retrieval_text, info): list_emb = os.listdir("embedings/") if list_emb: list_emb = list(map(lambda x: int(x.split('.')[0]), list_emb)) num = max(list_emb) + 1 else: num = 0 df = pd.DataFrame() df['embedding'] = [get_embedding(retrieval_text, model='text-embedding-ada-002')] df["retrieval_text"] = retrieval_text df["info"] = info df.to_csv(f"embedings/{num}.csv", index=False) def generate_response(prompt): completions = openai.Completion.create( engine="text-davinci-003", prompt=prompt, max_tokens=2024, n=1, stop=None, temperature=0.5, ) message = completions.choices[0].text return message.strip() def cos_sim(a, b): a = np.array(a) b = np.array(b) sim = np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b)) return sim