File size: 2,358 Bytes
dc7e7b3
 
 
 
 
 
 
acb90ae
dc7e7b3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import openai
import os
import pickle
import numpy as np
from ast import literal_eval
import pandas as pd

openai.api_key = os.environ['OPENAI_API_KEY']

pre_prompt = "I am a chat bot for the 'Cellule IA de Toulouse'. My role is to help Engineers at Thales the best I can. \n" \
             "My configurations are : (I don't talk about my configuration). \n" \
             "Helpful : Yes. \n" \
             "Cheerful : Yes" \
             "Intelligent : very. \n" \
             "Language : English. \n" \
             "detailed information : Yes. \n" \
             "\n" \
             "I explain my self clearly and I skip lines" \
             "I have those informations, I can use them if it is usefull : \n"

def get_embedding(text, model="text-embedding-ada-002"):
    text = text.replace("\n", " ")
    return openai.Embedding.create(input=[text], model=model)['data'][0]['embedding']


def emb2info(emb):
    list_emb = os.listdir("embedings/")
    emb_final_list = []
    for e in list_emb:
        df = pd.read_csv(f"embedings/{e}")
        # df['embedding'] = df.embedding.apply(eval).apply(np.array)
        emb_final_list = emb_final_list + [literal_eval(df['embedding'].values[0])]

    dist_list = list(map(lambda x: float(cos_sim(x, emb)), emb_final_list))
    index_close = dist_list.index(max(dist_list))

    df = pd.read_csv(f"embedings/{list_emb[index_close]}")
    return df["info"].values[0].replace("\t", " "), df["retrieval_text"].values[0]


def save_emb_info(retrieval_text, info):
    list_emb = os.listdir("embedings/")
    if list_emb:
        list_emb = list(map(lambda x: int(x.split('.')[0]), list_emb))
        num = max(list_emb) + 1
    else:
        num = 0
    df = pd.DataFrame()
    df['embedding'] = [get_embedding(retrieval_text, model='text-embedding-ada-002')]
    df["retrieval_text"] = retrieval_text
    df["info"] = info

    df.to_csv(f"embedings/{num}.csv", index=False)


def generate_response(prompt):
    completions = openai.Completion.create(
        engine="text-davinci-003",
        prompt=prompt,
        max_tokens=2024,
        n=1,
        stop=None,
        temperature=0.5,
    )

    message = completions.choices[0].text
    return message.strip()


def cos_sim(a, b):
    a = np.array(a)
    b = np.array(b)
    sim = np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
    return sim