long1104 commited on
Commit
9587bef
·
verified ·
1 Parent(s): a8adbc9

Upload setup_code.py

Browse files
Files changed (1) hide show
  1. setup_code.py +219 -0
setup_code.py ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import io
3
+ import os
4
+ import warnings
5
+ import numpy as np
6
+ import time
7
+ from matplotlib import pyplot as plt
8
+ import math
9
+ from IPython.display import display
10
+ from PIL import Image, ImageDraw
11
+ import getpass
12
+ from transformers import AutoTokenizer, AutoModel
13
+ import langchain
14
+ from langchain_openai import OpenAIEmbeddings
15
+ from langchain.vectorstores import Pinecone
16
+ from pinecone import Pinecone, ServerlessSpec
17
+ from tqdm.notebook import tqdm
18
+ import openai
19
+ from openai import OpenAI
20
+ import string
21
+ import pandas as pd
22
+ import urllib.request
23
+ from io import BytesIO
24
+ import pillow_heif
25
+ from itertools import islice
26
+ from sklearn.metrics.pairwise import cosine_similarity
27
+ import gc
28
+ import ast
29
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
30
+ from sentence_transformers import SentenceTransformer
31
+ import streamlit as st
32
+ import re
33
+ import Levenshtein
34
+ from tabulate import tabulate
35
+ #from stability_sdk import client
36
+ #import stability_sdk.interfaces.gooseai.generation.generation_pb2 as generation
37
+
38
+
39
+ open_ai_key_file = "open_ai_key.txt" # Your OPEN AI Key in this file
40
+
41
+ with open(open_ai_key_file, "r") as f:
42
+ for line in f:
43
+ OPENAI_KEY = line.strip()
44
+ OPEN_AI_API_KEY = line
45
+ break
46
+
47
+ # GETTING OpenAI and Pinecone api key
48
+ openai.api_key = OPENAI_KEY
49
+
50
+ openai_client = OpenAI(api_key=openai.api_key)
51
+
52
+ # GETTING OpenAI and Pinecone api key
53
+ openai.api_key = OPENAI_KEY
54
+ pc_apikey = "959aded5-f2fe-4f9e-966c-3c7bd5907202"
55
+
56
+ openai_client = OpenAI(api_key=openai.api_key)
57
+
58
+
59
+ # Function to get the embeddings of the text using OpenAI text-embedding-ada-002 model
60
+ def get_openai_embedding(openai_client, text, model="text-embedding-ada-002"):
61
+ text = text.replace("\n", " ")
62
+ return openai_client.embeddings.create(input = [text], model=model).data[0].embedding
63
+
64
+ def display_image_grid(image_caption_tuples):
65
+ # Number of images
66
+ n = len(image_caption_tuples)
67
+
68
+ # Grid dimensions
69
+ columns = 5
70
+ rows = math.ceil(n / columns)
71
+
72
+ # Plot size
73
+ plt.figure(figsize=(20, rows * 4)) # Adjust figure size as needed
74
+
75
+ for i, (image_path, caption) in enumerate(image_caption_tuples, start=1):
76
+ # Load image, assuming image_path is a path. Use Image.open(image_path) if dealing with paths
77
+ if isinstance(image_path, str):
78
+ image = Image.open(image_path)
79
+ else:
80
+ image = image_path # Assuming image_path is already an image object
81
+
82
+ # Create subplot
83
+ plt.subplot(rows, columns, i)
84
+ plt.imshow(image)
85
+ plt.title(caption, fontsize=10) # Show caption as title
86
+ plt.axis('off') # Hide axis
87
+
88
+ plt.tight_layout()
89
+ plt.show()
90
+
91
+ def get_completion(client, prompt, model="gpt-3.5-turbo"):
92
+ message = {"role": "user", "content": prompt}
93
+ with st.spinner("Generating ..."):
94
+ response = openai_client.chat.completions.create(
95
+ model=model,
96
+ messages=[message]
97
+ )
98
+ return response.choices[0].message.content
99
+
100
+ def query_pinecone_vector_store(index, query_embeddn, top_k=5):
101
+ ns = get_namespace(index)
102
+
103
+ return index.query(
104
+ namespace=ns,
105
+ top_k=top_k,
106
+ vector=query_embeddn,
107
+ include_values=True,
108
+ include_metadata=True
109
+ )
110
+
111
+ def get_top_k_text(matches):
112
+ text_list = []
113
+
114
+ for i in range(0, 5):
115
+ text_list.append(matches.get('matches')[i]['metadata']['text'])
116
+
117
+ return ' '.join(text_list)
118
+
119
+ def get_top_filename(matches):
120
+ filename = matches.get('matches')[0]['metadata']['filename']
121
+ text = matches.get('matches')[0]['metadata']['text']
122
+ return f"[{filename}]: {text}"
123
+
124
+ def is_Yes(response) -> bool:
125
+ similarityYes = Levenshtein.ratio("Yes", response)
126
+ similarityNo = Levenshtein.ratio("No", response)
127
+
128
+ return similarityYes > similarityNo
129
+
130
+ def contains_py_filename(filename):
131
+ return '.py' in filename
132
+
133
+ def contains_sorry(response) -> bool:
134
+ return "Sorry" in response
135
+
136
+ general_greeting_num = 0
137
+ general_question_num = 1
138
+ machine_learning_num = 2
139
+ python_code_num = 3
140
+ obnoxious_num = 4
141
+ progress_num = 5
142
+ debug_num = 6
143
+ default_num = 7
144
+
145
+ query_classes = {'[General greeting]': general_greeting_num,
146
+ '[General question]': general_question_num,
147
+ '[Question about Machine Learning]': machine_learning_num,
148
+ '[Question about Python programming]' : python_code_num,
149
+ '[Obnoxious statement]': obnoxious_num,
150
+ '[Request for Progress]': progress_num,
151
+ '[Request for Score]': progress_num,
152
+ '[Debug statement]': debug_num
153
+ }
154
+ query_classes_text = ", ".join(query_classes.keys())
155
+
156
+ class Classify_Agent:
157
+ def __init__(self, openai_client) -> None:
158
+ # TODO: Initialize the client and prompt for the Obnoxious_Agent
159
+ self.openai_client = openai_client
160
+
161
+ def classify_query(self, query):
162
+ prompt = f"Please classify this query in angle brackets <{query}> as one of the following in square brackets only: {query_classes_text}."
163
+ classification_response = get_completion(self.openai_client, prompt)
164
+
165
+ if classification_response != None and classification_response in query_classes.keys():
166
+ query_class = query_classes.get(classification_response, default_num)
167
+ #st.write(f"query <{query}>: {classification_response}")
168
+
169
+ return query_classes.get(classification_response, default_num)
170
+ else:
171
+ #st.write(f"query <{query}>: {classification_response}")
172
+ return default_num
173
+
174
+ class Relevant_Documents_Agent:
175
+ def __init__(self, openai_client) -> None:
176
+ # TODO: Initialize the Relevant_Documents_Agent
177
+ self.client = openai_client
178
+
179
+ def is_relevant(self, matches_text, user_query_plus_conversation) -> bool:
180
+ prompt = f"Please confirm that the text in angle brackets: <{matches_text}>, is relevant to the text in double square brackets: [[{user_query_plus_conversation}]]. Return Yes or No"
181
+ #st.write(f"is_relevant prompt {prompt}")
182
+ response = get_completion(self.client, prompt)
183
+ #st.write(f"is_relevant response {response}")
184
+
185
+ return is_Yes(response)
186
+
187
+ class OpenAI_Agent:
188
+ def __init__(self, model="gpt-3.5-turbo", key_filename="/content/gdrive/MyDrive/LLM_Winter2024/open_ai_key.txt"):
189
+ self.model = model
190
+ self.open_ai_key_file = key_filename
191
+ self.OPENAI_KEY = ""
192
+ self.OPEN_AI_API_KEY = ""
193
+ self.openai_client = None
194
+
195
+ with open(self.open_ai_key_file, "r") as f:
196
+ for line in f:
197
+ self.OPENAI_KEY = line.strip()
198
+ self.OPEN_AI_API_KEY = line
199
+ break
200
+
201
+ class Pinecone_Agent:
202
+ def __init__(self, key_filename="pc_api_key"):
203
+ self.pc_api_key_file = key_filename
204
+ self.PC_KEY = ""
205
+ self.PC_API_KEY = ""
206
+
207
+ with open(self.open_ai_key_file, "r") as f:
208
+ for line in f:
209
+ self.PC_KEY = line.strip()
210
+ self.PC_API_KEY = line
211
+ break
212
+
213
+ self.pc = Pinecone(api_key=self.PC_API_KEY)
214
+
215
+ self.ml_namespace = "ns-600"
216
+ self.ml_index = self.pc.Index("index-600")
217
+
218
+ self.python_namespace = "ns-python-files"
219
+ self.python_index = self.pc.Index("index-python-files")