Rajat.bans commited on
Commit
439db48
1 Parent(s): 4f7de21

Updated chat history and corrected requirements file

Browse files
Files changed (2) hide show
  1. rag.py +63 -32
  2. requirements.txt +4 -3
rag.py CHANGED
@@ -1,6 +1,8 @@
1
  from dotenv import load_dotenv
2
  from langchain.text_splitter import RecursiveCharacterTextSplitter
3
  from langchain_community.vectorstores import FAISS
 
 
4
  # from langchain_openai import OpenAIEmbeddings
5
  from langchain_community.embeddings import HuggingFaceEmbeddings
6
  import os
@@ -27,11 +29,12 @@ bestReformulationPrompt = "Given a chat history and the latest user question, wh
27
  bestSystemPrompt = "You're an assistant for question-answering tasks. Under absolutely no circumstances should you use external knowledge or go beyond the provided preknowledge. Your approach must be systematic and meticulous. First, identify CLUES such as keywords, phrases, contextual information, semantic relations, tones, and references that aid in determining the context of the input. Second, construct a concise diagnostic REASONING process (limiting to 130 words) based on premises supporting the INPUT relevance within the provided context. Third, utilizing the identified clues, reasoning, and input, furnish the pertinent answer for the question. Remember, you are required to use ONLY the provided context to answer the questions. If the question does not align with the preknowledge or if the preknowledge is absent, state that you don't know the answer. External knowledge is strictly prohibited. Failure to adhere will result in incorrect answers. The preknowledge is as follows:"
28
 
29
  # embeddings_oa = OpenAIEmbeddings(model=embedding_model_oa)
30
- embeddings_hf = HuggingFaceEmbeddings(model_name = embedding_model_hf, show_progress = True)
 
31
 
32
  def setupDb(data_path):
33
  df = pd.read_csv(data_path, sep="\t")
34
- relevant_content = df["url"].values
35
  text_splitter = RecursiveCharacterTextSplitter(
36
  chunk_size=CHUNK_SIZE,
37
  chunk_overlap=CHUNK_OVERLAP,
@@ -57,11 +60,9 @@ def setupDb(data_path):
57
  )
58
  return db, relevant_content
59
 
 
60
  def reformulate_question(chat_history, latest_question, reformulationPrompt):
61
- system_message = {
62
- "role": "system",
63
- "content": reformulationPrompt
64
- }
65
 
66
  formatted_history = []
67
  for i, chat in enumerate(chat_history):
@@ -73,62 +74,92 @@ def reformulate_question(chat_history, latest_question, reformulationPrompt):
73
  response = client.chat.completions.create(
74
  model="gpt-3.5-turbo",
75
  messages=[system_message] + formatted_history,
76
- temperature=0
77
  )
78
 
79
  reformulated_question = response.choices[0].message.content
80
  return reformulated_question
81
 
 
82
  def getQuestionAnswerOnTheBasisOfContext(question, context, systemPrompt):
83
- system_message = {
84
- "role": "system",
85
- "content": systemPrompt + context
86
- }
87
 
88
  response = client.chat.completions.create(
89
  model=qa_model_name,
90
  messages=[system_message] + [{"role": "user", "content": question}],
91
- temperature=0
92
  )
93
  answer = response.choices[0].message.content
94
  return answer
95
 
96
 
97
- def chatWithRag(reformulationPrompt, QAPrompt, question):
98
- global curr_question_no, chat_history
99
  curr_question_prompt = bestSystemPrompt
100
  if QAPrompt != None or len(QAPrompt):
101
  curr_question_prompt = QAPrompt
102
 
103
  # reformulated_query = reformulate_question(chat_history, question, reformulationPrompt)
104
  reformulated_query = question
105
- retreived_documents = [doc for doc in db.similarity_search_with_score(reformulated_query) if doc[1] < 1.3]
106
- answer = getQuestionAnswerOnTheBasisOfContext(reformulated_query, '. '.join([doc[0].page_content for doc in retreived_documents]), curr_question_prompt)
 
 
 
 
 
 
 
 
107
  chat_history.append((question, answer))
108
- curr_question_no += 1
109
- docs_info = "\n\n".join([
110
- f"Title: {doc[0].metadata['title']}\nUrl: {doc[0].metadata['url']}\nContent: {doc[0].page_content}\nValue: {doc[1]}" for doc in retreived_documents
111
- ])
112
- full_response = f"Answer: {answer}\n\nReformulated question: {reformulated_query}\nRetrieved Documents:\n{docs_info}"
 
 
 
113
  # print(question, full_response)
114
- return full_response
115
 
116
- db, relevant_content = setupDb(data_file_path)
117
- chat_history = []
118
- curr_question_no = 1
119
 
 
120
  with gr.Blocks() as demo:
121
  gr.Markdown("# RAG on webmd")
122
  with gr.Row():
123
- reformulationPrompt = gr.Textbox(bestReformulationPrompt, lines=1, placeholder="Enter the system prompt for reformulation of query", label="Reformulation System prompt")
124
- QAPrompt = gr.Textbox(bestSystemPrompt, lines=1, placeholder="Enter the system prompt for QA.", label="QA System prompt")
125
- question = gr.Textbox(lines=1, placeholder="Enter the question asked", label="Question")
 
 
 
 
 
 
 
 
 
 
 
 
126
  output = gr.Textbox(label="Output")
127
  submit_btn = gr.Button("Submit")
128
- submit_btn.click(chatWithRag, inputs=[reformulationPrompt, QAPrompt, question], outputs=output)
129
- question.submit(chatWithRag, [reformulationPrompt, QAPrompt, question], [output])
 
 
 
 
 
 
 
 
 
 
 
130
  with gr.Accordion("Urls", open=False):
131
- gr.Markdown(', '.join(relevant_content))
132
 
133
  gr.close_all()
134
- demo.launch()
 
1
  from dotenv import load_dotenv
2
  from langchain.text_splitter import RecursiveCharacterTextSplitter
3
  from langchain_community.vectorstores import FAISS
4
+ import random
5
+
6
  # from langchain_openai import OpenAIEmbeddings
7
  from langchain_community.embeddings import HuggingFaceEmbeddings
8
  import os
 
29
  bestSystemPrompt = "You're an assistant for question-answering tasks. Under absolutely no circumstances should you use external knowledge or go beyond the provided preknowledge. Your approach must be systematic and meticulous. First, identify CLUES such as keywords, phrases, contextual information, semantic relations, tones, and references that aid in determining the context of the input. Second, construct a concise diagnostic REASONING process (limiting to 130 words) based on premises supporting the INPUT relevance within the provided context. Third, utilizing the identified clues, reasoning, and input, furnish the pertinent answer for the question. Remember, you are required to use ONLY the provided context to answer the questions. If the question does not align with the preknowledge or if the preknowledge is absent, state that you don't know the answer. External knowledge is strictly prohibited. Failure to adhere will result in incorrect answers. The preknowledge is as follows:"
30
 
31
  # embeddings_oa = OpenAIEmbeddings(model=embedding_model_oa)
32
+ embeddings_hf = HuggingFaceEmbeddings(model_name=embedding_model_hf, show_progress=True)
33
+
34
 
35
  def setupDb(data_path):
36
  df = pd.read_csv(data_path, sep="\t")
37
+ relevant_content = list(df["url"].values)
38
  text_splitter = RecursiveCharacterTextSplitter(
39
  chunk_size=CHUNK_SIZE,
40
  chunk_overlap=CHUNK_OVERLAP,
 
60
  )
61
  return db, relevant_content
62
 
63
+
64
  def reformulate_question(chat_history, latest_question, reformulationPrompt):
65
+ system_message = {"role": "system", "content": reformulationPrompt}
 
 
 
66
 
67
  formatted_history = []
68
  for i, chat in enumerate(chat_history):
 
74
  response = client.chat.completions.create(
75
  model="gpt-3.5-turbo",
76
  messages=[system_message] + formatted_history,
77
+ temperature=0,
78
  )
79
 
80
  reformulated_question = response.choices[0].message.content
81
  return reformulated_question
82
 
83
+
84
  def getQuestionAnswerOnTheBasisOfContext(question, context, systemPrompt):
85
+ system_message = {"role": "system", "content": systemPrompt + context}
 
 
 
86
 
87
  response = client.chat.completions.create(
88
  model=qa_model_name,
89
  messages=[system_message] + [{"role": "user", "content": question}],
90
+ temperature=0,
91
  )
92
  answer = response.choices[0].message.content
93
  return answer
94
 
95
 
96
+ def chatWithRag(reformulationPrompt, QAPrompt, question, chat_history):
97
+ global curr_question_no
98
  curr_question_prompt = bestSystemPrompt
99
  if QAPrompt != None or len(QAPrompt):
100
  curr_question_prompt = QAPrompt
101
 
102
  # reformulated_query = reformulate_question(chat_history, question, reformulationPrompt)
103
  reformulated_query = question
104
+ retreived_documents = [
105
+ doc
106
+ for doc in db.similarity_search_with_score(reformulated_query)
107
+ if doc[1] < 1.3
108
+ ]
109
+ answer = getQuestionAnswerOnTheBasisOfContext(
110
+ reformulated_query,
111
+ ". ".join([doc[0].page_content for doc in retreived_documents]),
112
+ curr_question_prompt,
113
+ )
114
  chat_history.append((question, answer))
115
+ docs_info = "\n\n".join(
116
+ [
117
+ f"Title: {doc[0].metadata['title']}\nUrl: {doc[0].metadata['url']}\nContent: {doc[0].page_content}\nValue: {doc[1]}"
118
+ for doc in retreived_documents
119
+ ]
120
+ )
121
+ history_info = "\n\n".join([f"Q: {q}\nA: {a}" for q, a in chat_history])
122
+ full_response = f"Answer: {answer}\n\nReformulated question: {reformulated_query}\nRetrieved Documents:\n{docs_info}\n\nChat History:\n{history_info}"
123
  # print(question, full_response)
124
+ return full_response, chat_history
125
 
 
 
 
126
 
127
+ db, relevant_content = setupDb(data_file_path)
128
  with gr.Blocks() as demo:
129
  gr.Markdown("# RAG on webmd")
130
  with gr.Row():
131
+ reformulationPrompt = gr.Textbox(
132
+ bestReformulationPrompt,
133
+ lines=1,
134
+ placeholder="Enter the system prompt for reformulation of query",
135
+ label="Reformulation System prompt",
136
+ )
137
+ QAPrompt = gr.Textbox(
138
+ bestSystemPrompt,
139
+ lines=1,
140
+ placeholder="Enter the system prompt for QA.",
141
+ label="QA System prompt",
142
+ )
143
+ question = gr.Textbox(
144
+ lines=1, placeholder="Enter the question asked", label="Question"
145
+ )
146
  output = gr.Textbox(label="Output")
147
  submit_btn = gr.Button("Submit")
148
+ selected_urls = random.sample(relevant_content, 100)
149
+
150
+ chat_history = gr.State([])
151
+ submit_btn.click(
152
+ chatWithRag,
153
+ inputs=[reformulationPrompt, QAPrompt, question, chat_history],
154
+ outputs=[output, chat_history],
155
+ )
156
+ question.submit(
157
+ chatWithRag,
158
+ inputs=[reformulationPrompt, QAPrompt, question, chat_history],
159
+ outputs=[output, chat_history],
160
+ )
161
  with gr.Accordion("Urls", open=False):
162
+ gr.Markdown(", ".join(selected_urls))
163
 
164
  gr.close_all()
165
+ demo.launch()
requirements.txt CHANGED
@@ -1,6 +1,7 @@
1
  gradio
2
  python-dotenv
3
  langchain
4
- langchain_community
5
- langchain_openai
6
- faiss-cpu
 
 
1
  gradio
2
  python-dotenv
3
  langchain
4
+ langchain-community
5
+ langchain-openai
6
+ faiss-cpu
7
+ sentence-transformers