FridayMaster commited on
Commit
0972a36
1 Parent(s): d62d124

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -9
app.py CHANGED
@@ -1,4 +1,3 @@
1
-
2
  import gradio as gr
3
  import nltk
4
  from nltk.tokenize import sent_tokenize
@@ -10,15 +9,15 @@ import openai
10
 
11
  # Set up OpenAI API key
12
  openai.api_key = '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'
 
13
  # Download NLTK data
14
  nltk.download('punkt')
15
- nltk.download('punkt_tab')
16
 
17
  # Load the tokenizer and model
18
  tokenizer = AutoTokenizer.from_pretrained("microsoft/MiniLM-L12-H384-uncased")
19
  model = AutoModel.from_pretrained("microsoft/MiniLM-L12-H384-uncased")
20
 
21
- manual_path="ubuntu_manual.txt"
22
 
23
  # Load the Ubuntu manual from a .txt file
24
  with open(manual_path, "r", encoding="utf-8") as file:
@@ -64,17 +63,22 @@ dimension = chunk_embeddings_np.shape[1]
64
  index = faiss.IndexFlatL2(dimension)
65
  index.add(chunk_embeddings_np)
66
 
67
- # Function to retrieve relevant chunks for a user query
68
  def retrieve_chunks(query, k=5):
69
  query_embedding = embed_text([query])
70
  distances, indices = index.search(query_embedding, k=k)
71
  valid_indices = [i for i in indices[0] if i < len(manual_chunks)]
72
  relevant_chunks = [manual_chunks[i] for i in valid_indices]
73
- return relevant_chunks
 
 
 
 
 
74
 
75
  # Function to perform RAG: Retrieve chunks and generate a response using GPT-3.5
76
  def rag_response_gpt3_5(query, k=3, max_tokens=150):
77
- relevant_chunks = retrieve_chunks(query, k=k)
78
  if not relevant_chunks:
79
  return "Sorry, I couldn't find relevant information."
80
 
@@ -101,10 +105,17 @@ def rag_response_gpt3_5(query, k=3, max_tokens=150):
101
  return response.choices[0].message['content'].strip()
102
 
103
  # Chat history to maintain conversation context
104
- history = []
105
-
106
- # Define Gradio interface function with chat history
107
  def chatbot(query, history):
 
 
 
 
 
 
 
 
 
 
108
  response = rag_response_gpt3_5(query)
109
  history.append((query, response))
110
 
 
 
1
  import gradio as gr
2
  import nltk
3
  from nltk.tokenize import sent_tokenize
 
9
 
10
  # Set up OpenAI API key
11
  openai.api_key = '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'
12
+
13
  # Download NLTK data
14
  nltk.download('punkt')
 
15
 
16
  # Load the tokenizer and model
17
  tokenizer = AutoTokenizer.from_pretrained("microsoft/MiniLM-L12-H384-uncased")
18
  model = AutoModel.from_pretrained("microsoft/MiniLM-L12-H384-uncased")
19
 
20
+ manual_path = "ubuntu_manual.txt"
21
 
22
  # Load the Ubuntu manual from a .txt file
23
  with open(manual_path, "r", encoding="utf-8") as file:
 
63
  index = faiss.IndexFlatL2(dimension)
64
  index.add(chunk_embeddings_np)
65
 
66
+ # Function to retrieve relevant chunks for a user query and print indices and distances
67
  def retrieve_chunks(query, k=5):
68
  query_embedding = embed_text([query])
69
  distances, indices = index.search(query_embedding, k=k)
70
  valid_indices = [i for i in indices[0] if i < len(manual_chunks)]
71
  relevant_chunks = [manual_chunks[i] for i in valid_indices]
72
+
73
+ # Print indices and distances
74
+ for i, idx in enumerate(valid_indices):
75
+ print(f"Index: {idx}, Distance: {distances[0][i]}")
76
+
77
+ return relevant_chunks, indices[0], distances[0]
78
 
79
  # Function to perform RAG: Retrieve chunks and generate a response using GPT-3.5
80
  def rag_response_gpt3_5(query, k=3, max_tokens=150):
81
+ relevant_chunks, indices, distances = retrieve_chunks(query, k=k)
82
  if not relevant_chunks:
83
  return "Sorry, I couldn't find relevant information."
84
 
 
105
  return response.choices[0].message['content'].strip()
106
 
107
  # Chat history to maintain conversation context
 
 
 
108
  def chatbot(query, history):
109
+ if history is None:
110
+ history = []
111
+
112
+ # Retrieve relevant chunks along with their indices and distances
113
+ relevant_chunks, indices, distances = retrieve_chunks(query)
114
+
115
+ # Print the indices and distances of the retrieved chunks
116
+ print(f"Retrieved Indices: {indices}")
117
+ print(f"Retrieved Distances: {distances}")
118
+
119
  response = rag_response_gpt3_5(query)
120
  history.append((query, response))
121