File size: 2,683 Bytes
9f50973
 
007946f
 
 
9f50973
 
25f3cc2
9f50973
 
007946f
 
9f50973
007946f
 
 
9f50973
 
 
 
7f28337
9f50973
 
007946f
 
 
 
 
 
9f50973
7f28337
9f50973
7f28337
 
23a506b
7f28337
 
 
 
 
 
 
 
 
 
 
9f50973
 
7f28337
9f50973
 
7f28337
9f50973
 
7f28337
9f50973
7f28337
 
 
 
 
9f50973
7f28337
8f3da7e
7f28337
 
 
 
 
 
 
 
 
 
9f50973
7f28337
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77

import sklearn
import sqlite3
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import openai
import os
import gradio as gr

openai.api_key = os.environ["Secret"] 


def find_closest_neighbors(vector1, dictionary_of_vectors):
    """
    Takes a vector and a dictionary of vectors and returns the three closest neighbors
    """
    vector = openai.Embedding.create(
        input=vector1,
        engine="text-embedding-ada-002"
    )['data'][0]['embedding']

    vector = np.array(vector)

    cosine_similarities = {}
    for key, value in dictionary_of_vectors.items():
        cosine_similarities[key] = cosine_similarity(vector.reshape(1, -1), value.reshape(1, -1))[0][0]

    sorted_cosine_similarities = sorted(cosine_similarities.items(), key=lambda x: x[1], reverse=True)
    match_list = sorted_cosine_similarities[0:4]

    return match_list

def predict(message, history):
    # Connect to the database
    conn = sqlite3.connect('QRIdatabase7 (2).db')
    cursor = conn.cursor()
    cursor.execute('''SELECT text, embedding FROM chunks''')
    rows = cursor.fetchall()

    dictionary_of_vectors = {}
    for row in rows:
        text = row[0]
        embedding_str = row[1]
        embedding = np.fromstring(embedding_str, sep=' ')
        dictionary_of_vectors[text] = embedding
    conn.close()

    # Find the closest neighbors
    match_list = find_closest_neighbors(message, dictionary_of_vectors)
    context = ''
    for match in match_list:
        context += str(match[0])
    context = context[:-1500]

    prep = f"This is an OpenAI model tuned to answer questions specific to the Qualia Research institute, a research institute that focuses on consciousness. Here is some question-specific context, and then the Question to answer, related to consciousness, the human experience, and phenomenology: {context}. Here is a question specific to QRI and consciousness in general Q:  {message}  A: "

    history_openai_format = []
    for human, assistant in history:
        history_openai_format.append({"role": "user", "content": human })
        history_openai_format.append({"role": "assistant", "content":assistant})
    history_openai_format.append({"role": "user", "content": prep})

    response = openai.ChatCompletion.create(
        model='gpt-4',
        messages= history_openai_format,         
        temperature=1.0,
        stream=True
    )
    
    partial_message = ""
    for chunk in response:
        if len(chunk['choices'][0]['delta']) != 0:
            partial_message = partial_message + chunk['choices'][0]['delta']['content']
            yield partial_message 

gr.ChatInterface(predict).queue().launch()