Spaces:

long1104
/

chatbot

Running

App Files Files Community

chatbot / setup_code.py

long1104

Update setup_code.py

3c07e5f verified 6 months ago

raw

history blame

7.18 kB


	import io
	import os
	import warnings
	import numpy as np
	import time
	from matplotlib import pyplot as plt
	import math
	from IPython.display import display
	from PIL import Image, ImageDraw
	import getpass
	from transformers import AutoTokenizer, AutoModel
	import langchain
	from langchain_openai import OpenAIEmbeddings
	from langchain.vectorstores import Pinecone
	from pinecone import Pinecone, ServerlessSpec
	from tqdm.notebook import tqdm
	import openai
	from openai import OpenAI
	import string
	import pandas as pd
	import urllib.request
	from io import BytesIO
	import pillow_heif
	from itertools import islice
	from sklearn.metrics.pairwise import cosine_similarity
	import gc
	import ast
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from sentence_transformers import SentenceTransformer
	import streamlit as st
	import re
	import Levenshtein
	from tabulate import tabulate
	#from stability_sdk import client
	#import stability_sdk.interfaces.gooseai.generation.generation_pb2 as generation


	open_ai_key_file = "open_ai_key.txt" # Your OPEN AI Key in this file

	with open(open_ai_key_file, "r") as f:
	for line in f:
	OPENAI_KEY = line.strip()
	OPEN_AI_API_KEY = line
	break

	# GETTING OpenAI and Pinecone api key
	openai.api_key = OPENAI_KEY

	openai_client = OpenAI(api_key=openai.api_key)

	# GETTING OpenAI and Pinecone api key
	openai.api_key = OPENAI_KEY
	pc_apikey = "959aded5-f2fe-4f9e-966c-3c7bd5907202"

	openai_client = OpenAI(api_key=openai.api_key)


	# Function to get the embeddings of the text using OpenAI text-embedding-ada-002 model
	def get_openai_embedding(openai_client, text, model="text-embedding-ada-002"):
	text = text.replace("\n", " ")
	return openai_client.embeddings.create(input = [text], model=model).data[0].embedding

	def display_image_grid(image_caption_tuples):
	# Number of images
	n = len(image_caption_tuples)

	# Grid dimensions
	columns = 5
	rows = math.ceil(n / columns)

	# Plot size
	plt.figure(figsize=(20, rows * 4)) # Adjust figure size as needed

	for i, (image_path, caption) in enumerate(image_caption_tuples, start=1):
	# Load image, assuming image_path is a path. Use Image.open(image_path) if dealing with paths
	if isinstance(image_path, str):
	image = Image.open(image_path)
	else:
	image = image_path # Assuming image_path is already an image object

	# Create subplot
	plt.subplot(rows, columns, i)
	plt.imshow(image)
	plt.title(caption, fontsize=10) # Show caption as title
	plt.axis('off') # Hide axis

	plt.tight_layout()
	plt.show()

	def get_completion(client, prompt, model="gpt-3.5-turbo"):
	message = {"role": "user", "content": prompt}
	with st.spinner("Generating ..."):
	response = openai_client.chat.completions.create(
	model=model,
	messages=[message]
	)
	return response.choices[0].message.content

	def query_pinecone_vector_store(index, query_embeddn, top_k=5):
	ns = get_namespace(index)

	return index.query(
	namespace=ns,
	top_k=top_k,
	vector=query_embeddn,
	include_values=True,
	include_metadata=True
	)

	def get_top_k_text(matches):
	text_list = []

	for i in range(0, 5):
	text_list.append(matches.get('matches')[i]['metadata']['text'])

	return ' '.join(text_list)

	def get_top_filename(matches):
	filename = matches.get('matches')[0]['metadata']['filename']
	text = matches.get('matches')[0]['metadata']['text']
	return f"[{filename}]: {text}"

	def is_Yes(response) -> bool:
	similarityYes = Levenshtein.ratio("Yes", response)
	similarityNo = Levenshtein.ratio("No", response)

	return similarityYes > similarityNo

	def contains_py_filename(filename):
	return '.py' in filename

	def contains_sorry(response) -> bool:
	return "Sorry" in response

	general_greeting_num = 0
	general_question_num = 1
	machine_learning_num = 2
	python_code_num = 3
	obnoxious_num = 4
	progress_num = 5
	debug_num = 6
	default_num = 7

	query_classes = {'[General greeting]': general_greeting_num,
	'[General question]': general_question_num,
	'[Question about Machine Learning]': machine_learning_num,
	'[Question about Python programming]' : python_code_num,
	'[Obnoxious statement]': obnoxious_num,
	'[Request for Progress]': progress_num,
	'[Request for Score]': progress_num,
	'[Debug statement]': debug_num
	}
	query_classes_text = ", ".join(query_classes.keys())

	class Classify_Agent:
	def __init__(self, openai_client) -> None:
	# TODO: Initialize the client and prompt for the Obnoxious_Agent
	self.openai_client = openai_client

	def classify_query(self, query):
	prompt = f"Please classify this query in angle brackets <{query}> as one of the following in square brackets only: {query_classes_text}."
	classification_response = get_completion(self.openai_client, prompt)

	if classification_response != None and classification_response in query_classes.keys():
	query_class = query_classes.get(classification_response, default_num)
	#st.write(f"query <{query}>: {classification_response}")

	return query_classes.get(classification_response, default_num)
	else:
	#st.write(f"query <{query}>: {classification_response}")
	return default_num

	class Relevant_Documents_Agent:
	def __init__(self, openai_client) -> None:
	# TODO: Initialize the Relevant_Documents_Agent
	self.client = openai_client

	def is_relevant(self, matches_text, user_query_plus_conversation) -> bool:
	prompt = f"Please confirm that the text in angle brackets: <{matches_text}>, is relevant to the text in double square brackets: [[{user_query_plus_conversation}]]. Return Yes or No"
	#st.write(f"is_relevant prompt {prompt}")
	# response = get_completion(self.client, prompt)
	#st.write(f"is_relevant response {response}")

	count = 0

	for i in range(3):
	response = get_completion(self.client, prompt)
	count += int(is_Yes(response))

	st.write(count)
	# return is_Yes(response)
	return count >= 2

	class OpenAI_Agent:
	def __init__(self, model="gpt-3.5-turbo", key_filename="open_ai_key.txt"):
	self.model = model
	self.open_ai_key_file = key_filename
	self.OPENAI_KEY = ""
	self.OPEN_AI_API_KEY = ""
	self.openai_client = None

	with open(self.open_ai_key_file, "r") as f:
	for line in f:
	self.OPENAI_KEY = line.strip()
	self.OPEN_AI_API_KEY = line
	break

	class Pinecone_Agent:
	def __init__(self, key_filename="pc_api_key"):
	self.pc_api_key_file = key_filename
	self.PC_KEY = ""
	self.PC_API_KEY = ""

	with open(self.open_ai_key_file, "r") as f:
	for line in f:
	self.PC_KEY = line.strip()
	self.PC_API_KEY = line
	break

	self.pc = Pinecone(api_key=self.PC_API_KEY)

	self.ml_namespace = "ns-600"
	self.ml_index = self.pc.Index("index-600")

	self.python_namespace = "ns-python-files"
	self.python_index = self.pc.Index("index-python-files")