Curranj commited on
Commit
9f50973
1 Parent(s): 27cd290

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -71
app.py CHANGED
@@ -1,89 +1,92 @@
1
- import openai
2
- import gradio as gr
3
  import sqlite3
4
  import numpy as np
5
  from sklearn.metrics.pairwise import cosine_similarity
 
 
 
 
6
 
7
- openai.api_key = "sk-..." # Replace with your key
8
 
9
- def find_closest_neighbors(vector, dictionary_of_vectors):
10
  """
11
  Takes a vector and a dictionary of vectors and returns the three closest neighbors
12
  """
 
 
 
 
 
 
 
 
 
 
13
  cosine_similarities = {}
14
  for key, value in dictionary_of_vectors.items():
15
  cosine_similarities[key] = cosine_similarity(vector.reshape(1, -1), value.reshape(1, -1))[0][0]
16
 
 
17
  sorted_cosine_similarities = sorted(cosine_similarities.items(), key=lambda x: x[1], reverse=True)
18
  match_list = sorted_cosine_similarities[0:4]
19
-
20
  return match_list
21
 
22
- def handle_input(user_input):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  """
24
- Checks if the user input is a text file or a string.
25
- If it's a text file, it reads the file, splits it into 250-character chunks, and returns the chunks.
26
- If it's a string, it just returns the string.
27
  """
28
- if isinstance(user_input, gr.inputs.File):
29
- with open(user_input.name, 'r') as file:
30
- text = file.read()
31
- chunks = [text[i:i+250] for i in range(0, len(text), 250)]
32
- return chunks
33
- else:
34
- return [user_input]
35
-
36
- def predict(user_input, history):
37
- # Connect to the database
38
- conn = sqlite3.connect('QRIdatabase7 (1).db')
39
- cursor = conn.cursor()
40
- cursor.execute('''SELECT text, embedding FROM chunks''')
41
- rows = cursor.fetchall()
42
-
43
- dictionary_of_vectors = {}
44
- for row in rows:
45
- text = row[0]
46
- embedding_str = row[1]
47
- embedding = np.fromstring(embedding_str, sep=' ')
48
- dictionary_of_vectors[text] = embedding
49
- conn.close()
50
-
51
- input_chunks = handle_input(user_input)
52
-
53
- for message in input_chunks:
54
- # Create embedding for the message
55
- message_vector = openai.Embedding.create(
56
- input=message,
57
- engine="text-embedding-ada-002"
58
- )['data'][0]['embedding']
59
- message_vector = np.array(message_vector)
60
-
61
- # Find the closest neighbors
62
- match_list = find_closest_neighbors(message_vector, dictionary_of_vectors)
63
- context = ''
64
- for match in match_list:
65
- context += str(match[0])
66
- context = context[:-1500]
67
-
68
- prep = f"This is an OpenAI model tuned to answer questions specific to the Qualia Research institute, a research institute that focuses on consciousness. Here is some question-specific context, and then the Question to answer, related to consciousness, the human experience, and phenomenology: {context}. Here is a question specific to QRI and consciousness in general Q: {message} A: "
69
-
70
- history_openai_format = []
71
- for human, assistant in history:
72
- history_openai_format.append({"role": "user", "content": human })
73
- history_openai_format.append({"role": "assistant", "content":assistant})
74
- history_openai_format.append({"role": "user", "content": prep})
75
-
76
- response = openai.ChatCompletion.create(
77
- model='gpt-4',
78
- messages= history_openai_format,
79
- temperature=1.0,
80
- stream=True
81
- )
82
-
83
- partial_message = ""
84
- for chunk in response:
85
- if len(chunk['choices'][0]['delta']) != 0:
86
- partial_message = partial_message + chunk['choices'][0]['delta']['content']
87
- yield partial_message
88
-
89
- gr.ChatInterface(predict, inputs=gr.inputs.Mixed([gr.inputs.Textbox(lines=3), gr.inputs.File()]), allow_flagging=False).queue().launch()
 
1
+
2
+ import sklearn
3
  import sqlite3
4
  import numpy as np
5
  from sklearn.metrics.pairwise import cosine_similarity
6
+ import openai
7
+ import os
8
+
9
+ openai.api_key = os.environ["Secret"]
10
 
 
11
 
12
+ def find_closest_neighbors(vector1, dictionary_of_vectors):
13
  """
14
  Takes a vector and a dictionary of vectors and returns the three closest neighbors
15
  """
16
+
17
+ # Convert the input string to a vector
18
+ vector = openai.Embedding.create(
19
+ input=vector1,
20
+ engine="text-embedding-ada-002"
21
+ )['data'][0]['embedding']
22
+
23
+ vector = np.array(vector)
24
+
25
+ # Finds cosine similarities between the vector and values in the dictionary and Creates a dictionary of cosine similarities with its text key
26
  cosine_similarities = {}
27
  for key, value in dictionary_of_vectors.items():
28
  cosine_similarities[key] = cosine_similarity(vector.reshape(1, -1), value.reshape(1, -1))[0][0]
29
 
30
+ # Sorts the dictionary by value and returns the three highest values
31
  sorted_cosine_similarities = sorted(cosine_similarities.items(), key=lambda x: x[1], reverse=True)
32
  match_list = sorted_cosine_similarities[0:4]
33
+ web = str(sorted_cosine_similarities[0][0])
34
  return match_list
35
 
36
+ # Connect to the database
37
+ conn = sqlite3.connect('QRIdatabase7.db')
38
+
39
+ # Create a cursor
40
+ cursor = conn.cursor()
41
+
42
+ # Select the text and embedding from the chunks table
43
+ cursor.execute('''SELECT text, embedding FROM chunks''')
44
+
45
+ # Fetch the rows
46
+ rows = cursor.fetchall()
47
+
48
+ # Create a dictionary to store the text and embedding for each row
49
+ dictionary_of_vectors = {}
50
+
51
+ # Iterate through the rows and add them to the dictionary
52
+ for row in rows:
53
+ text = row[0]
54
+ embedding_str = row[1]
55
+ # Convert the embedding string to a NumPy array
56
+ embedding = np.fromstring(embedding_str, sep=' ')
57
+ dictionary_of_vectors[text] = embedding
58
+
59
+ # Close the connection
60
+ conn.close()
61
+
62
+ def context_gpt_response(question):
63
  """
64
+ Takes a question and returns an answer
 
 
65
  """
66
+
67
+ # Find the closest neighbors
68
+ match_list = find_closest_neighbors(question, dictionary_of_vectors)
69
+
70
+ # Create a string of the text from the closest neighbors
71
+ context = ''
72
+ for match in match_list:
73
+ context += str(match[0])
74
+ context = context[:-1500]
75
+
76
+ prep = f"This is an OpenAI model tuned to answer questions specific to the Qualia Research institute, a research institute that focuses on consciousness. Here is some question-specific context, and then the Question to answer, related to consciousness, the human experience, and phenomenology: {context}. Here is a question specific to QRI and consciousness in general Q: {question} A: "
77
+ # Generate an answer
78
+ response = openai.Completion.create(
79
+ engine="gpt-4",
80
+ prompt=prep,
81
+ temperature=0.7,
82
+ max_tokens=220,
83
+ )
84
+
85
+
86
+ # Return the answer
87
+ return response['choices'][0]['text']
88
+
89
+ import gradio as gr
90
+
91
+ iface = gr.Interface(fn=context_gpt_response, inputs="text", outputs="text",title="Qualia Research Institute GPTbot", description="Ask any question and get QRI specific answers!", examples=[["What is QRI?"], ["What is the Symmetry Theory of Valence?"], ["Explain Logarithmic scales of pain and pleasure"]])
92
+ iface.launch()