Spaces:
Running
Running
Upload setup_code.py
Browse files- setup_code.py +219 -0
setup_code.py
ADDED
@@ -0,0 +1,219 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import io
|
3 |
+
import os
|
4 |
+
import warnings
|
5 |
+
import numpy as np
|
6 |
+
import time
|
7 |
+
from matplotlib import pyplot as plt
|
8 |
+
import math
|
9 |
+
from IPython.display import display
|
10 |
+
from PIL import Image, ImageDraw
|
11 |
+
import getpass
|
12 |
+
from transformers import AutoTokenizer, AutoModel
|
13 |
+
import langchain
|
14 |
+
from langchain_openai import OpenAIEmbeddings
|
15 |
+
from langchain.vectorstores import Pinecone
|
16 |
+
from pinecone import Pinecone, ServerlessSpec
|
17 |
+
from tqdm.notebook import tqdm
|
18 |
+
import openai
|
19 |
+
from openai import OpenAI
|
20 |
+
import string
|
21 |
+
import pandas as pd
|
22 |
+
import urllib.request
|
23 |
+
from io import BytesIO
|
24 |
+
import pillow_heif
|
25 |
+
from itertools import islice
|
26 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
27 |
+
import gc
|
28 |
+
import ast
|
29 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
30 |
+
from sentence_transformers import SentenceTransformer
|
31 |
+
import streamlit as st
|
32 |
+
import re
|
33 |
+
import Levenshtein
|
34 |
+
from tabulate import tabulate
|
35 |
+
#from stability_sdk import client
|
36 |
+
#import stability_sdk.interfaces.gooseai.generation.generation_pb2 as generation
|
37 |
+
|
38 |
+
|
39 |
+
open_ai_key_file = "open_ai_key.txt" # Your OPEN AI Key in this file
|
40 |
+
|
41 |
+
with open(open_ai_key_file, "r") as f:
|
42 |
+
for line in f:
|
43 |
+
OPENAI_KEY = line.strip()
|
44 |
+
OPEN_AI_API_KEY = line
|
45 |
+
break
|
46 |
+
|
47 |
+
# GETTING OpenAI and Pinecone api key
|
48 |
+
openai.api_key = OPENAI_KEY
|
49 |
+
|
50 |
+
openai_client = OpenAI(api_key=openai.api_key)
|
51 |
+
|
52 |
+
# GETTING OpenAI and Pinecone api key
|
53 |
+
openai.api_key = OPENAI_KEY
|
54 |
+
pc_apikey = "959aded5-f2fe-4f9e-966c-3c7bd5907202"
|
55 |
+
|
56 |
+
openai_client = OpenAI(api_key=openai.api_key)
|
57 |
+
|
58 |
+
|
59 |
+
# Function to get the embeddings of the text using OpenAI text-embedding-ada-002 model
|
60 |
+
def get_openai_embedding(openai_client, text, model="text-embedding-ada-002"):
|
61 |
+
text = text.replace("\n", " ")
|
62 |
+
return openai_client.embeddings.create(input = [text], model=model).data[0].embedding
|
63 |
+
|
64 |
+
def display_image_grid(image_caption_tuples):
|
65 |
+
# Number of images
|
66 |
+
n = len(image_caption_tuples)
|
67 |
+
|
68 |
+
# Grid dimensions
|
69 |
+
columns = 5
|
70 |
+
rows = math.ceil(n / columns)
|
71 |
+
|
72 |
+
# Plot size
|
73 |
+
plt.figure(figsize=(20, rows * 4)) # Adjust figure size as needed
|
74 |
+
|
75 |
+
for i, (image_path, caption) in enumerate(image_caption_tuples, start=1):
|
76 |
+
# Load image, assuming image_path is a path. Use Image.open(image_path) if dealing with paths
|
77 |
+
if isinstance(image_path, str):
|
78 |
+
image = Image.open(image_path)
|
79 |
+
else:
|
80 |
+
image = image_path # Assuming image_path is already an image object
|
81 |
+
|
82 |
+
# Create subplot
|
83 |
+
plt.subplot(rows, columns, i)
|
84 |
+
plt.imshow(image)
|
85 |
+
plt.title(caption, fontsize=10) # Show caption as title
|
86 |
+
plt.axis('off') # Hide axis
|
87 |
+
|
88 |
+
plt.tight_layout()
|
89 |
+
plt.show()
|
90 |
+
|
91 |
+
def get_completion(client, prompt, model="gpt-3.5-turbo"):
|
92 |
+
message = {"role": "user", "content": prompt}
|
93 |
+
with st.spinner("Generating ..."):
|
94 |
+
response = openai_client.chat.completions.create(
|
95 |
+
model=model,
|
96 |
+
messages=[message]
|
97 |
+
)
|
98 |
+
return response.choices[0].message.content
|
99 |
+
|
100 |
+
def query_pinecone_vector_store(index, query_embeddn, top_k=5):
|
101 |
+
ns = get_namespace(index)
|
102 |
+
|
103 |
+
return index.query(
|
104 |
+
namespace=ns,
|
105 |
+
top_k=top_k,
|
106 |
+
vector=query_embeddn,
|
107 |
+
include_values=True,
|
108 |
+
include_metadata=True
|
109 |
+
)
|
110 |
+
|
111 |
+
def get_top_k_text(matches):
|
112 |
+
text_list = []
|
113 |
+
|
114 |
+
for i in range(0, 5):
|
115 |
+
text_list.append(matches.get('matches')[i]['metadata']['text'])
|
116 |
+
|
117 |
+
return ' '.join(text_list)
|
118 |
+
|
119 |
+
def get_top_filename(matches):
|
120 |
+
filename = matches.get('matches')[0]['metadata']['filename']
|
121 |
+
text = matches.get('matches')[0]['metadata']['text']
|
122 |
+
return f"[{filename}]: {text}"
|
123 |
+
|
124 |
+
def is_Yes(response) -> bool:
|
125 |
+
similarityYes = Levenshtein.ratio("Yes", response)
|
126 |
+
similarityNo = Levenshtein.ratio("No", response)
|
127 |
+
|
128 |
+
return similarityYes > similarityNo
|
129 |
+
|
130 |
+
def contains_py_filename(filename):
|
131 |
+
return '.py' in filename
|
132 |
+
|
133 |
+
def contains_sorry(response) -> bool:
|
134 |
+
return "Sorry" in response
|
135 |
+
|
136 |
+
general_greeting_num = 0
|
137 |
+
general_question_num = 1
|
138 |
+
machine_learning_num = 2
|
139 |
+
python_code_num = 3
|
140 |
+
obnoxious_num = 4
|
141 |
+
progress_num = 5
|
142 |
+
debug_num = 6
|
143 |
+
default_num = 7
|
144 |
+
|
145 |
+
query_classes = {'[General greeting]': general_greeting_num,
|
146 |
+
'[General question]': general_question_num,
|
147 |
+
'[Question about Machine Learning]': machine_learning_num,
|
148 |
+
'[Question about Python programming]' : python_code_num,
|
149 |
+
'[Obnoxious statement]': obnoxious_num,
|
150 |
+
'[Request for Progress]': progress_num,
|
151 |
+
'[Request for Score]': progress_num,
|
152 |
+
'[Debug statement]': debug_num
|
153 |
+
}
|
154 |
+
query_classes_text = ", ".join(query_classes.keys())
|
155 |
+
|
156 |
+
class Classify_Agent:
|
157 |
+
def __init__(self, openai_client) -> None:
|
158 |
+
# TODO: Initialize the client and prompt for the Obnoxious_Agent
|
159 |
+
self.openai_client = openai_client
|
160 |
+
|
161 |
+
def classify_query(self, query):
|
162 |
+
prompt = f"Please classify this query in angle brackets <{query}> as one of the following in square brackets only: {query_classes_text}."
|
163 |
+
classification_response = get_completion(self.openai_client, prompt)
|
164 |
+
|
165 |
+
if classification_response != None and classification_response in query_classes.keys():
|
166 |
+
query_class = query_classes.get(classification_response, default_num)
|
167 |
+
#st.write(f"query <{query}>: {classification_response}")
|
168 |
+
|
169 |
+
return query_classes.get(classification_response, default_num)
|
170 |
+
else:
|
171 |
+
#st.write(f"query <{query}>: {classification_response}")
|
172 |
+
return default_num
|
173 |
+
|
174 |
+
class Relevant_Documents_Agent:
|
175 |
+
def __init__(self, openai_client) -> None:
|
176 |
+
# TODO: Initialize the Relevant_Documents_Agent
|
177 |
+
self.client = openai_client
|
178 |
+
|
179 |
+
def is_relevant(self, matches_text, user_query_plus_conversation) -> bool:
|
180 |
+
prompt = f"Please confirm that the text in angle brackets: <{matches_text}>, is relevant to the text in double square brackets: [[{user_query_plus_conversation}]]. Return Yes or No"
|
181 |
+
#st.write(f"is_relevant prompt {prompt}")
|
182 |
+
response = get_completion(self.client, prompt)
|
183 |
+
#st.write(f"is_relevant response {response}")
|
184 |
+
|
185 |
+
return is_Yes(response)
|
186 |
+
|
187 |
+
class OpenAI_Agent:
|
188 |
+
def __init__(self, model="gpt-3.5-turbo", key_filename="/content/gdrive/MyDrive/LLM_Winter2024/open_ai_key.txt"):
|
189 |
+
self.model = model
|
190 |
+
self.open_ai_key_file = key_filename
|
191 |
+
self.OPENAI_KEY = ""
|
192 |
+
self.OPEN_AI_API_KEY = ""
|
193 |
+
self.openai_client = None
|
194 |
+
|
195 |
+
with open(self.open_ai_key_file, "r") as f:
|
196 |
+
for line in f:
|
197 |
+
self.OPENAI_KEY = line.strip()
|
198 |
+
self.OPEN_AI_API_KEY = line
|
199 |
+
break
|
200 |
+
|
201 |
+
class Pinecone_Agent:
|
202 |
+
def __init__(self, key_filename="pc_api_key"):
|
203 |
+
self.pc_api_key_file = key_filename
|
204 |
+
self.PC_KEY = ""
|
205 |
+
self.PC_API_KEY = ""
|
206 |
+
|
207 |
+
with open(self.open_ai_key_file, "r") as f:
|
208 |
+
for line in f:
|
209 |
+
self.PC_KEY = line.strip()
|
210 |
+
self.PC_API_KEY = line
|
211 |
+
break
|
212 |
+
|
213 |
+
self.pc = Pinecone(api_key=self.PC_API_KEY)
|
214 |
+
|
215 |
+
self.ml_namespace = "ns-600"
|
216 |
+
self.ml_index = self.pc.Index("index-600")
|
217 |
+
|
218 |
+
self.python_namespace = "ns-python-files"
|
219 |
+
self.python_index = self.pc.Index("index-python-files")
|