Spaces:
Sleeping
Sleeping
FridayMaster
commited on
Commit
•
0972a36
1
Parent(s):
d62d124
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
|
2 |
import gradio as gr
|
3 |
import nltk
|
4 |
from nltk.tokenize import sent_tokenize
|
@@ -10,15 +9,15 @@ import openai
|
|
10 |
|
11 |
# Set up OpenAI API key
|
12 |
openai.api_key = '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'
|
|
|
13 |
# Download NLTK data
|
14 |
nltk.download('punkt')
|
15 |
-
nltk.download('punkt_tab')
|
16 |
|
17 |
# Load the tokenizer and model
|
18 |
tokenizer = AutoTokenizer.from_pretrained("microsoft/MiniLM-L12-H384-uncased")
|
19 |
model = AutoModel.from_pretrained("microsoft/MiniLM-L12-H384-uncased")
|
20 |
|
21 |
-
manual_path="ubuntu_manual.txt"
|
22 |
|
23 |
# Load the Ubuntu manual from a .txt file
|
24 |
with open(manual_path, "r", encoding="utf-8") as file:
|
@@ -64,17 +63,22 @@ dimension = chunk_embeddings_np.shape[1]
|
|
64 |
index = faiss.IndexFlatL2(dimension)
|
65 |
index.add(chunk_embeddings_np)
|
66 |
|
67 |
-
# Function to retrieve relevant chunks for a user query
|
68 |
def retrieve_chunks(query, k=5):
|
69 |
query_embedding = embed_text([query])
|
70 |
distances, indices = index.search(query_embedding, k=k)
|
71 |
valid_indices = [i for i in indices[0] if i < len(manual_chunks)]
|
72 |
relevant_chunks = [manual_chunks[i] for i in valid_indices]
|
73 |
-
|
|
|
|
|
|
|
|
|
|
|
74 |
|
75 |
# Function to perform RAG: Retrieve chunks and generate a response using GPT-3.5
|
76 |
def rag_response_gpt3_5(query, k=3, max_tokens=150):
|
77 |
-
relevant_chunks = retrieve_chunks(query, k=k)
|
78 |
if not relevant_chunks:
|
79 |
return "Sorry, I couldn't find relevant information."
|
80 |
|
@@ -101,10 +105,17 @@ def rag_response_gpt3_5(query, k=3, max_tokens=150):
|
|
101 |
return response.choices[0].message['content'].strip()
|
102 |
|
103 |
# Chat history to maintain conversation context
|
104 |
-
history = []
|
105 |
-
|
106 |
-
# Define Gradio interface function with chat history
|
107 |
def chatbot(query, history):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
response = rag_response_gpt3_5(query)
|
109 |
history.append((query, response))
|
110 |
|
|
|
|
|
1 |
import gradio as gr
|
2 |
import nltk
|
3 |
from nltk.tokenize import sent_tokenize
|
|
|
9 |
|
10 |
# Set up OpenAI API key
|
11 |
openai.api_key = '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'
|
12 |
+
|
13 |
# Download NLTK data
|
14 |
nltk.download('punkt')
|
|
|
15 |
|
16 |
# Load the tokenizer and model
|
17 |
tokenizer = AutoTokenizer.from_pretrained("microsoft/MiniLM-L12-H384-uncased")
|
18 |
model = AutoModel.from_pretrained("microsoft/MiniLM-L12-H384-uncased")
|
19 |
|
20 |
+
manual_path = "ubuntu_manual.txt"
|
21 |
|
22 |
# Load the Ubuntu manual from a .txt file
|
23 |
with open(manual_path, "r", encoding="utf-8") as file:
|
|
|
63 |
index = faiss.IndexFlatL2(dimension)
|
64 |
index.add(chunk_embeddings_np)
|
65 |
|
66 |
+
# Function to retrieve relevant chunks for a user query and print indices and distances
|
67 |
def retrieve_chunks(query, k=5):
|
68 |
query_embedding = embed_text([query])
|
69 |
distances, indices = index.search(query_embedding, k=k)
|
70 |
valid_indices = [i for i in indices[0] if i < len(manual_chunks)]
|
71 |
relevant_chunks = [manual_chunks[i] for i in valid_indices]
|
72 |
+
|
73 |
+
# Print indices and distances
|
74 |
+
for i, idx in enumerate(valid_indices):
|
75 |
+
print(f"Index: {idx}, Distance: {distances[0][i]}")
|
76 |
+
|
77 |
+
return relevant_chunks, indices[0], distances[0]
|
78 |
|
79 |
# Function to perform RAG: Retrieve chunks and generate a response using GPT-3.5
|
80 |
def rag_response_gpt3_5(query, k=3, max_tokens=150):
|
81 |
+
relevant_chunks, indices, distances = retrieve_chunks(query, k=k)
|
82 |
if not relevant_chunks:
|
83 |
return "Sorry, I couldn't find relevant information."
|
84 |
|
|
|
105 |
return response.choices[0].message['content'].strip()
|
106 |
|
107 |
# Chat history to maintain conversation context
|
|
|
|
|
|
|
108 |
def chatbot(query, history):
|
109 |
+
if history is None:
|
110 |
+
history = []
|
111 |
+
|
112 |
+
# Retrieve relevant chunks along with their indices and distances
|
113 |
+
relevant_chunks, indices, distances = retrieve_chunks(query)
|
114 |
+
|
115 |
+
# Print the indices and distances of the retrieved chunks
|
116 |
+
print(f"Retrieved Indices: {indices}")
|
117 |
+
print(f"Retrieved Distances: {distances}")
|
118 |
+
|
119 |
response = rag_response_gpt3_5(query)
|
120 |
history.append((query, response))
|
121 |
|