Spaces:

long1104
/

chatbot

Running

App Files Files Community

long1104 commited on Mar 12

Commit

9587bef

•

1 Parent(s): a8adbc9

Upload setup_code.py

Browse files

Files changed (1) hide show

setup_code.py +219 -0

setup_code.py ADDED Viewed

	@@ -0,0 +1,219 @@

+import io
+import os
+import warnings
+import numpy as np
+import time
+from matplotlib import pyplot as plt
+import math
+from IPython.display import display
+from PIL import Image, ImageDraw
+import getpass
+from transformers import AutoTokenizer, AutoModel
+import langchain
+from langchain_openai import OpenAIEmbeddings
+from langchain.vectorstores import Pinecone
+from pinecone import Pinecone, ServerlessSpec
+from tqdm.notebook import tqdm
+import openai
+from openai import OpenAI
+import string
+import pandas as pd
+import urllib.request
+from io import BytesIO
+import pillow_heif
+from itertools import islice
+from sklearn.metrics.pairwise import cosine_similarity
+import gc
+import ast
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from sentence_transformers import SentenceTransformer
+import streamlit as st
+import re
+import Levenshtein
+from tabulate import tabulate
+#from stability_sdk import client
+#import stability_sdk.interfaces.gooseai.generation.generation_pb2 as generation
+open_ai_key_file = "open_ai_key.txt" # Your OPEN AI Key in this file
+with open(open_ai_key_file, "r") as f:
+  for line in f:
+    OPENAI_KEY = line.strip()
+    OPEN_AI_API_KEY = line
+    break
+# GETTING OpenAI and Pinecone api key
+openai.api_key = OPENAI_KEY
+openai_client = OpenAI(api_key=openai.api_key)
+# GETTING OpenAI and Pinecone api key
+openai.api_key  = OPENAI_KEY
+pc_apikey = "959aded5-f2fe-4f9e-966c-3c7bd5907202"
+openai_client = OpenAI(api_key=openai.api_key)
+# Function to get the embeddings of the text using OpenAI text-embedding-ada-002 model
+def get_openai_embedding(openai_client, text, model="text-embedding-ada-002"):
+   text = text.replace("\n", " ")
+   return openai_client.embeddings.create(input = [text], model=model).data[0].embedding
+def display_image_grid(image_caption_tuples):
+    # Number of images
+    n = len(image_caption_tuples)
+    # Grid dimensions
+    columns = 5
+    rows = math.ceil(n / columns)
+    # Plot size
+    plt.figure(figsize=(20, rows * 4))  # Adjust figure size as needed
+    for i, (image_path, caption) in enumerate(image_caption_tuples, start=1):
+        # Load image, assuming image_path is a path. Use Image.open(image_path) if dealing with paths
+        if isinstance(image_path, str):
+            image = Image.open(image_path)
+        else:
+            image = image_path  # Assuming image_path is already an image object
+        # Create subplot
+        plt.subplot(rows, columns, i)
+        plt.imshow(image)
+        plt.title(caption, fontsize=10)  # Show caption as title
+        plt.axis('off')  # Hide axis
+    plt.tight_layout()
+    plt.show()
+def get_completion(client, prompt, model="gpt-3.5-turbo"):
+  message = {"role": "user", "content": prompt}
+  with st.spinner("Generating ..."):
+    response = openai_client.chat.completions.create(
+        model=model,
+        messages=[message]
+    )
+  return response.choices[0].message.content
+def query_pinecone_vector_store(index, query_embeddn, top_k=5):
+    ns = get_namespace(index)
+    return index.query(
+        namespace=ns,
+        top_k=top_k,
+        vector=query_embeddn,
+        include_values=True,
+        include_metadata=True
+    )
+def get_top_k_text(matches):
+    text_list = []
+    for i in range(0, 5):
+        text_list.append(matches.get('matches')[i]['metadata']['text'])
+    return ' '.join(text_list)
+def get_top_filename(matches):
+    filename = matches.get('matches')[0]['metadata']['filename']
+    text = matches.get('matches')[0]['metadata']['text']
+    return f"[{filename}]: {text}"
+def is_Yes(response) -> bool:
+    similarityYes = Levenshtein.ratio("Yes", response)
+    similarityNo = Levenshtein.ratio("No", response)
+    return similarityYes > similarityNo
+def contains_py_filename(filename):
+    return '.py' in filename
+def contains_sorry(response) -> bool:
+    return "Sorry" in response
+general_greeting_num = 0
+general_question_num = 1
+machine_learning_num = 2
+python_code_num = 3
+obnoxious_num = 4
+progress_num = 5
+debug_num = 6
+default_num = 7
+query_classes = {'[General greeting]': general_greeting_num,
+           '[General question]': general_question_num,
+           '[Question about Machine Learning]': machine_learning_num,
+           '[Question about Python programming]' : python_code_num,
+           '[Obnoxious statement]': obnoxious_num,
+           '[Request for Progress]': progress_num,
+           '[Request for Score]': progress_num,
+           '[Debug statement]': debug_num
+}
+query_classes_text = ", ".join(query_classes.keys())
+class Classify_Agent:
+    def __init__(self, openai_client) -> None:
+        # TODO: Initialize the client and prompt for the Obnoxious_Agent
+        self.openai_client = openai_client
+    def classify_query(self, query):
+        prompt = f"Please classify this query in angle brackets <{query}> as one of the following in square brackets only: {query_classes_text}."
+        classification_response = get_completion(self.openai_client, prompt)
+        if classification_response != None and classification_response in query_classes.keys():
+            query_class = query_classes.get(classification_response, default_num)
+            #st.write(f"query <{query}>: {classification_response}")
+            return query_classes.get(classification_response, default_num)
+        else:
+            #st.write(f"query <{query}>: {classification_response}")
+            return default_num
+class Relevant_Documents_Agent:
+    def __init__(self, openai_client) -> None:
+        # TODO: Initialize the Relevant_Documents_Agent
+        self.client = openai_client
+    def is_relevant(self, matches_text, user_query_plus_conversation) -> bool:
+      prompt = f"Please confirm that the text in angle brackets: <{matches_text}>, is relevant to the text in double square brackets: [[{user_query_plus_conversation}]]. Return Yes or No"
+      #st.write(f"is_relevant prompt {prompt}")
+      response = get_completion(self.client, prompt)
+      #st.write(f"is_relevant response {response}")
+      return is_Yes(response)
+class OpenAI_Agent:
+  def __init__(self, model="gpt-3.5-turbo", key_filename="/content/gdrive/MyDrive/LLM_Winter2024/open_ai_key.txt"):
+    self.model = model
+    self.open_ai_key_file = key_filename
+    self.OPENAI_KEY = ""
+    self.OPEN_AI_API_KEY = ""
+    self.openai_client = None
+    with open(self.open_ai_key_file, "r") as f:
+      for line in f:
+        self.OPENAI_KEY = line.strip()
+        self.OPEN_AI_API_KEY = line
+        break
+class Pinecone_Agent:
+  def __init__(self, key_filename="pc_api_key"):
+    self.pc_api_key_file = key_filename
+    self.PC_KEY = ""
+    self.PC_API_KEY = ""
+    with open(self.open_ai_key_file, "r") as f:
+        for line in f:
+          self.PC_KEY = line.strip()
+          self.PC_API_KEY = line
+          break
+    self.pc = Pinecone(api_key=self.PC_API_KEY)
+    self.ml_namespace = "ns-600"
+    self.ml_index = self.pc.Index("index-600")
+    self.python_namespace = "ns-python-files"
+    self.python_index = self.pc.Index("index-python-files")