Spaces:

FridayMaster
/

CHATBOT1

Sleeping

App Files Files Community

FridayMaster commited on Aug 15

Commit

0972a36

•

1 Parent(s): d62d124

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -9

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
 import gradio as gr
 import nltk
 from nltk.tokenize import sent_tokenize
@@ -10,15 +9,15 @@ import openai
 # Set up OpenAI API key
 openai.api_key = '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'
 # Download NLTK data
 nltk.download('punkt')
-nltk.download('punkt_tab')
 # Load the tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained("microsoft/MiniLM-L12-H384-uncased")
 model = AutoModel.from_pretrained("microsoft/MiniLM-L12-H384-uncased")
-manual_path="ubuntu_manual.txt"
 # Load the Ubuntu manual from a .txt file
 with open(manual_path, "r", encoding="utf-8") as file:
@@ -64,17 +63,22 @@ dimension = chunk_embeddings_np.shape[1]
 index = faiss.IndexFlatL2(dimension)
 index.add(chunk_embeddings_np)
-# Function to retrieve relevant chunks for a user query
 def retrieve_chunks(query, k=5):
     query_embedding = embed_text([query])
     distances, indices = index.search(query_embedding, k=k)
     valid_indices = [i for i in indices[0] if i < len(manual_chunks)]
     relevant_chunks = [manual_chunks[i] for i in valid_indices]
-    return relevant_chunks
 # Function to perform RAG: Retrieve chunks and generate a response using GPT-3.5
 def rag_response_gpt3_5(query, k=3, max_tokens=150):
-    relevant_chunks = retrieve_chunks(query, k=k)
     if not relevant_chunks:
         return "Sorry, I couldn't find relevant information."
@@ -101,10 +105,17 @@ def rag_response_gpt3_5(query, k=3, max_tokens=150):
     return response.choices[0].message['content'].strip()
 # Chat history to maintain conversation context
-history = []
-# Define Gradio interface function with chat history
 def chatbot(query, history):
     response = rag_response_gpt3_5(query)
     history.append((query, response))

 import gradio as gr
 import nltk
 from nltk.tokenize import sent_tokenize
 # Set up OpenAI API key
 openai.api_key = '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'
 # Download NLTK data
 nltk.download('punkt')
 # Load the tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained("microsoft/MiniLM-L12-H384-uncased")
 model = AutoModel.from_pretrained("microsoft/MiniLM-L12-H384-uncased")
+manual_path = "ubuntu_manual.txt"
 # Load the Ubuntu manual from a .txt file
 with open(manual_path, "r", encoding="utf-8") as file:
 index = faiss.IndexFlatL2(dimension)
 index.add(chunk_embeddings_np)
+# Function to retrieve relevant chunks for a user query and print indices and distances
 def retrieve_chunks(query, k=5):
     query_embedding = embed_text([query])
     distances, indices = index.search(query_embedding, k=k)
     valid_indices = [i for i in indices[0] if i < len(manual_chunks)]
     relevant_chunks = [manual_chunks[i] for i in valid_indices]
+    # Print indices and distances
+    for i, idx in enumerate(valid_indices):
+        print(f"Index: {idx}, Distance: {distances[0][i]}")
+    return relevant_chunks, indices[0], distances[0]
 # Function to perform RAG: Retrieve chunks and generate a response using GPT-3.5
 def rag_response_gpt3_5(query, k=3, max_tokens=150):
+    relevant_chunks, indices, distances = retrieve_chunks(query, k=k)
     if not relevant_chunks:
         return "Sorry, I couldn't find relevant information."
     return response.choices[0].message['content'].strip()
 # Chat history to maintain conversation context
 def chatbot(query, history):
+    if history is None:
+        history = []
+    # Retrieve relevant chunks along with their indices and distances
+    relevant_chunks, indices, distances = retrieve_chunks(query)
+    # Print the indices and distances of the retrieved chunks
+    print(f"Retrieved Indices: {indices}")
+    print(f"Retrieved Distances: {distances}")
     response = rag_response_gpt3_5(query)
     history.append((query, response))