Ahmad-Moiz commited on
Commit
574c00d
1 Parent(s): 0b1e2e1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -34
app.py CHANGED
@@ -53,7 +53,7 @@ def load_docs(files: List) -> str:
53
  @return: string of all docs concatenated
54
  """
55
 
56
- st.info("`Reading doc ...`")
57
  all_text = ""
58
  for file_path in files:
59
  file_extension = os.path.splitext(file_path.name)[1]
@@ -69,7 +69,7 @@ def load_docs(files: List) -> str:
69
  file_content = stringio.read()
70
  all_text += file_content
71
  else:
72
- st.warning('Please provide txt or pdf.', icon="⚠️")
73
  return all_text
74
 
75
 
@@ -82,7 +82,7 @@ def generate_eval(text: str, num_questions: int, chunk: int):
82
  @param chunk: chunk size to draw question from in the doc
83
  @return: eval set as JSON list
84
  """
85
- st.info("`Generating eval set ...`")
86
  n = len(text)
87
  starting_indices = [random.randint(0, n - chunk) for _ in range(num_questions)]
88
  sub_sequences = [text[i:i + chunk] for i in starting_indices]
@@ -93,7 +93,7 @@ def generate_eval(text: str, num_questions: int, chunk: int):
93
  qa = chain.run(b)
94
  eval_set.append(qa)
95
  except:
96
- st.warning('Error generating question %s.' % str(i + 1), icon="⚠️")
97
  eval_set_full = list(itertools.chain.from_iterable(eval_set))
98
  return eval_set_full
99
 
@@ -108,7 +108,7 @@ def split_texts(text, chunk_size: int, overlap, split_method: str):
108
  @param split_method:
109
  @return: list of str splits
110
  """
111
- st.info("`Splitting doc ...`")
112
  if split_method == "RecursiveTextSplitter":
113
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size,
114
  chunk_overlap=overlap)
@@ -117,7 +117,7 @@ def split_texts(text, chunk_size: int, overlap, split_method: str):
117
  chunk_size=chunk_size,
118
  chunk_overlap=overlap)
119
  else:
120
- st.warning("`Split method not recognized. Using RecursiveCharacterTextSplitter`", icon="⚠️")
121
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size,
122
  chunk_overlap=overlap)
123
 
@@ -139,7 +139,7 @@ def make_llm(model_version: str):
139
  elif model_version == "flan-t5-xl":
140
  chosen_model = HuggingFaceHub(repo_id="google/flan-t5-xl",model_kwargs={"temperature":0,"max_length":64})
141
  else:
142
- st.warning("`Model version not recognized. Using gpt-3.5-turbo`", icon="⚠️")
143
  chosen_model = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
144
  return chosen_model
145
 
@@ -154,14 +154,14 @@ def make_retriever(splits, retriever_type, embedding_type, num_neighbors, _llm):
154
  @param _llm: model
155
  @return: retriever
156
  """
157
- st.info("`Making retriever ...`")
158
  # Set embeddings
159
  if embedding_type == "OpenAI":
160
  embedding = OpenAIEmbeddings()
161
  elif embedding_type == "HuggingFace":
162
  embedding = HuggingFaceEmbeddings()
163
  else:
164
- st.warning("`Embedding type not recognized. Using OpenAI`", icon="⚠️")
165
  embedding = OpenAIEmbeddings()
166
 
167
  # Select retriever
@@ -169,8 +169,8 @@ def make_retriever(splits, retriever_type, embedding_type, num_neighbors, _llm):
169
  try:
170
  vector_store = FAISS.from_texts(splits, embedding)
171
  except ValueError:
172
- st.warning("`Error using OpenAI embeddings (disallowed TikToken token in the text). Using HuggingFace.`",
173
- icon="⚠️")
174
  vector_store = FAISS.from_texts(splits, HuggingFaceEmbeddings())
175
  retriever_obj = vector_store.as_retriever(k=num_neighbors)
176
  elif retriever_type == "SVM":
@@ -185,7 +185,7 @@ def make_retriever(splits, retriever_type, embedding_type, num_neighbors, _llm):
185
  faiss_index = faiss.IndexFlatL2(d)
186
  retriever_obj = GPTFaissIndex.from_documents(documents, faiss_index=faiss_index, service_context=context)
187
  else:
188
- st.warning("`Retriever type not recognized. Using SVM`", icon="⚠️")
189
  retriever_obj = SVMRetriever.from_texts(splits, embedding)
190
  return retriever_obj
191
 
@@ -198,7 +198,7 @@ def make_chain(llm, retriever, retriever_type: str) -> RetrievalQA:
198
  @param retriever_type: retriever type
199
  @return: chain (or return retriever for Llama-Index)
200
  """
201
- st.info("`Making chain ...`")
202
  if retriever_type == "Llama-Index":
203
  qa = retriever
204
  else:
@@ -218,7 +218,7 @@ def grade_model_answer(predicted_dataset: List, predictions: List, grade_answer_
218
  @return: A list of scores for the distilled answers.
219
  """
220
  # Grade the distilled answer
221
- st.info("`Grading model answer ...`")
222
  # Set the grading prompt based on the grade_answer_prompt parameter
223
  if grade_answer_prompt == "Fast":
224
  prompt = GRADE_ANSWER_PROMPT_FAST
@@ -255,7 +255,7 @@ def grade_model_retrieval(gt_dataset: List, predictions: List, grade_docs_prompt
255
  @return: list of scores for the retrieved documents.
256
  """
257
  # Grade the docs retrieval
258
- st.info("`Grading relevance of retrieved docs ...`")
259
 
260
  # Set the grading prompt based on the grade_docs_prompt parameter
261
  prompt = GRADE_DOCS_PROMPT_FAST if grade_docs_prompt == "Fast" else GRADE_DOCS_PROMPT
@@ -291,7 +291,7 @@ def run_evaluation(chain, retriever, eval_set, grade_prompt, retriever_type, num
291
  - latencies_list: A list of latencies in seconds for each question answered.
292
  - predictions_list: A list of dictionaries containing the model's predicted answers and relevant documents for each question.
293
  """
294
- st.info("`Running evaluation ...`")
295
  predictions_list = []
296
  retrieved_docs = []
297
  gt_dataset = []
@@ -335,27 +335,27 @@ def run_evaluation(chain, retriever, eval_set, grade_prompt, retriever_type, num
335
  # Auth
336
  st.sidebar.image("img/diagnostic.jpg")
337
 
338
- oai_api_key = st.sidebar.text_input("`OpenAI API Key:`", type="password")
339
- ant_api_key = st.sidebar.text_input("`(Optional) Anthropic API Key:`", type="password")
340
- hf_api_key = st.sidebar.text_input("`(Optional) HuggingFace API Token:`", type="password")
341
 
342
  with st.sidebar.form("user_input"):
343
 
344
- num_eval_questions = st.select_slider("`Number of eval questions`",
345
  options=[1, 5, 10, 15, 20], value=5)
346
 
347
- chunk_chars = st.select_slider("`Choose chunk size for splitting`",
348
  options=[500, 750, 1000, 1500, 2000], value=1000)
349
 
350
- overlap = st.select_slider("`Choose overlap for splitting`",
351
  options=[0, 50, 100, 150, 200], value=100)
352
 
353
- split_method = st.radio("`Split method`",
354
  ("RecursiveTextSplitter",
355
  "CharacterTextSplitter"),
356
  index=0)
357
 
358
- model = st.radio("`Choose model`",
359
  ("gpt-3.5-turbo",
360
  "gpt-4",
361
  "anthropic"),
@@ -363,22 +363,22 @@ with st.sidebar.form("user_input"):
363
  #"flan-t5-xl"),
364
  index=0)
365
 
366
- retriever_type = st.radio("`Choose retriever`",
367
  ("TF-IDF",
368
  "SVM",
369
  "Llama-Index",
370
  "similarity-search"),
371
  index=3)
372
 
373
- num_neighbors = st.select_slider("`Choose # chunks to retrieve`",
374
  options=[3, 4, 5, 6, 7, 8])
375
 
376
- embeddings = st.radio("`Choose embeddings`",
377
  ("HuggingFace",
378
  "OpenAI"),
379
  index=1)
380
 
381
- grade_prompt = st.radio("`Grading style prompt`",
382
  ("Fast",
383
  "Descriptive",
384
  "Descriptive w/ bias check",
@@ -387,21 +387,21 @@ with st.sidebar.form("user_input"):
387
 
388
  submitted = st.form_submit_button("Submit evaluation")
389
 
390
- st.sidebar.write("`By:` [@RLanceMartin](https://twitter.com/RLanceMartin)")
391
 
392
  # App
393
- st.header("`Auto-evaluator`")
394
  st.info(
395
  "`I am an evaluation tool for question-answering built on LangChain. Given documents, I will auto-generate a question-answer eval "
396
  "set and evaluate using the selected chain settings. Experiments with different configurations are logged. "
397
  "Optionally, provide your own eval set (as a JSON, see docs/karpathy-pod-eval.json for an example). If you don't have acess to GPT-4 or Anthropic, you can use our free hosted app here: https://autoevaluator.langchain.com/`")
398
 
399
  with st.form(key='file_inputs'):
400
- uploaded_file = st.file_uploader("`Please upload a file to evaluate (.txt or .pdf):` ",
401
  type=['pdf', 'txt'],
402
  accept_multiple_files=True)
403
 
404
- uploaded_eval_set = st.file_uploader("`[Optional] Please upload eval set (.json):` ",
405
  type=['json'],
406
  accept_multiple_files=False)
407
 
@@ -445,7 +445,7 @@ if uploaded_file and oai_api_key:
445
  percentage_answer = (correct_answer_count / len(graded_answers)) * 100
446
  percentage_docs = (correct_docs_count / len(graded_retrieval)) * 100
447
 
448
- st.subheader("`Run Results`")
449
  st.info(
450
  "`I will grade the chain based on: 1/ the relevance of the retrived documents relative to the question and 2/ "
451
  "the summarized answer relative to the ground truth answer. You can see (and change) to prompts used for "
@@ -453,7 +453,7 @@ if uploaded_file and oai_api_key:
453
  st.dataframe(data=d, use_container_width=True)
454
 
455
  # Accumulate results
456
- st.subheader("`Aggregate Results`")
457
  st.info(
458
  "`Retrieval and answer scores are percentage of retrived documents deemed relevant by the LLM grader ("
459
  "relative to the question) and percentage of summarized answers deemed relevant (relative to ground truth "
 
53
  @return: string of all docs concatenated
54
  """
55
 
56
+ st.info("Reading doc ...")
57
  all_text = ""
58
  for file_path in files:
59
  file_extension = os.path.splitext(file_path.name)[1]
 
69
  file_content = stringio.read()
70
  all_text += file_content
71
  else:
72
+ st.warning('Please provide txt or pdf.', icon="")
73
  return all_text
74
 
75
 
 
82
  @param chunk: chunk size to draw question from in the doc
83
  @return: eval set as JSON list
84
  """
85
+ st.info("Generating eval set ...")
86
  n = len(text)
87
  starting_indices = [random.randint(0, n - chunk) for _ in range(num_questions)]
88
  sub_sequences = [text[i:i + chunk] for i in starting_indices]
 
93
  qa = chain.run(b)
94
  eval_set.append(qa)
95
  except:
96
+ st.warning('Error generating question %s.' % str(i + 1), icon="")
97
  eval_set_full = list(itertools.chain.from_iterable(eval_set))
98
  return eval_set_full
99
 
 
108
  @param split_method:
109
  @return: list of str splits
110
  """
111
+ st.info("Splitting doc ...")
112
  if split_method == "RecursiveTextSplitter":
113
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size,
114
  chunk_overlap=overlap)
 
117
  chunk_size=chunk_size,
118
  chunk_overlap=overlap)
119
  else:
120
+ st.warning("Split method not recognized. Using RecursiveCharacterTextSplitter", icon="")
121
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size,
122
  chunk_overlap=overlap)
123
 
 
139
  elif model_version == "flan-t5-xl":
140
  chosen_model = HuggingFaceHub(repo_id="google/flan-t5-xl",model_kwargs={"temperature":0,"max_length":64})
141
  else:
142
+ st.warning("Model version not recognized. Using gpt-3.5-turbo", icon="")
143
  chosen_model = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
144
  return chosen_model
145
 
 
154
  @param _llm: model
155
  @return: retriever
156
  """
157
+ st.info("Making retriever ...")
158
  # Set embeddings
159
  if embedding_type == "OpenAI":
160
  embedding = OpenAIEmbeddings()
161
  elif embedding_type == "HuggingFace":
162
  embedding = HuggingFaceEmbeddings()
163
  else:
164
+ st.warning("Embedding type not recognized. Using OpenAI", icon="")
165
  embedding = OpenAIEmbeddings()
166
 
167
  # Select retriever
 
169
  try:
170
  vector_store = FAISS.from_texts(splits, embedding)
171
  except ValueError:
172
+ st.warning("Error using OpenAI embeddings (disallowed TikToken token in the text). Using HuggingFace.",
173
+ icon="")
174
  vector_store = FAISS.from_texts(splits, HuggingFaceEmbeddings())
175
  retriever_obj = vector_store.as_retriever(k=num_neighbors)
176
  elif retriever_type == "SVM":
 
185
  faiss_index = faiss.IndexFlatL2(d)
186
  retriever_obj = GPTFaissIndex.from_documents(documents, faiss_index=faiss_index, service_context=context)
187
  else:
188
+ st.warning("Retriever type not recognized. Using SVM", icon="")
189
  retriever_obj = SVMRetriever.from_texts(splits, embedding)
190
  return retriever_obj
191
 
 
198
  @param retriever_type: retriever type
199
  @return: chain (or return retriever for Llama-Index)
200
  """
201
+ st.info("Making chain ...")
202
  if retriever_type == "Llama-Index":
203
  qa = retriever
204
  else:
 
218
  @return: A list of scores for the distilled answers.
219
  """
220
  # Grade the distilled answer
221
+ st.info("Grading model answer ...")
222
  # Set the grading prompt based on the grade_answer_prompt parameter
223
  if grade_answer_prompt == "Fast":
224
  prompt = GRADE_ANSWER_PROMPT_FAST
 
255
  @return: list of scores for the retrieved documents.
256
  """
257
  # Grade the docs retrieval
258
+ st.info("Grading relevance of retrieved docs ...")
259
 
260
  # Set the grading prompt based on the grade_docs_prompt parameter
261
  prompt = GRADE_DOCS_PROMPT_FAST if grade_docs_prompt == "Fast" else GRADE_DOCS_PROMPT
 
291
  - latencies_list: A list of latencies in seconds for each question answered.
292
  - predictions_list: A list of dictionaries containing the model's predicted answers and relevant documents for each question.
293
  """
294
+ st.info("Running evaluation ...")
295
  predictions_list = []
296
  retrieved_docs = []
297
  gt_dataset = []
 
335
  # Auth
336
  st.sidebar.image("img/diagnostic.jpg")
337
 
338
+ oai_api_key = st.sidebar.text_input("OpenAI API Key:", type="password")
339
+ ant_api_key = st.sidebar.text_input("(Optional) Anthropic API Key:", type="password")
340
+ hf_api_key = st.sidebar.text_input("(Optional) HuggingFace API Token:", type="password")
341
 
342
  with st.sidebar.form("user_input"):
343
 
344
+ num_eval_questions = st.select_slider("Number of eval questions",
345
  options=[1, 5, 10, 15, 20], value=5)
346
 
347
+ chunk_chars = st.select_slider("Choose chunk size for splitting",
348
  options=[500, 750, 1000, 1500, 2000], value=1000)
349
 
350
+ overlap = st.select_slider("Choose overlap for splitting",
351
  options=[0, 50, 100, 150, 200], value=100)
352
 
353
+ split_method = st.radio("Split method",
354
  ("RecursiveTextSplitter",
355
  "CharacterTextSplitter"),
356
  index=0)
357
 
358
+ model = st.radio("Choose model",
359
  ("gpt-3.5-turbo",
360
  "gpt-4",
361
  "anthropic"),
 
363
  #"flan-t5-xl"),
364
  index=0)
365
 
366
+ retriever_type = st.radio("Choose retriever",
367
  ("TF-IDF",
368
  "SVM",
369
  "Llama-Index",
370
  "similarity-search"),
371
  index=3)
372
 
373
+ num_neighbors = st.select_slider("Choose # chunks to retrieve",
374
  options=[3, 4, 5, 6, 7, 8])
375
 
376
+ embeddings = st.radio("Choose embeddings",
377
  ("HuggingFace",
378
  "OpenAI"),
379
  index=1)
380
 
381
+ grade_prompt = st.radio("Grading style prompt",
382
  ("Fast",
383
  "Descriptive",
384
  "Descriptive w/ bias check",
 
387
 
388
  submitted = st.form_submit_button("Submit evaluation")
389
 
390
+ st.sidebar.write("By: [Sentient](https://twitter.com/sentient)")
391
 
392
  # App
393
+ st.header("Auto-evaluator")
394
  st.info(
395
  "`I am an evaluation tool for question-answering built on LangChain. Given documents, I will auto-generate a question-answer eval "
396
  "set and evaluate using the selected chain settings. Experiments with different configurations are logged. "
397
  "Optionally, provide your own eval set (as a JSON, see docs/karpathy-pod-eval.json for an example). If you don't have acess to GPT-4 or Anthropic, you can use our free hosted app here: https://autoevaluator.langchain.com/`")
398
 
399
  with st.form(key='file_inputs'):
400
+ uploaded_file = st.file_uploader("Please upload a file to evaluate (.txt or .pdf): ",
401
  type=['pdf', 'txt'],
402
  accept_multiple_files=True)
403
 
404
+ uploaded_eval_set = st.file_uploader("[Optional] Please upload eval set (.json): ",
405
  type=['json'],
406
  accept_multiple_files=False)
407
 
 
445
  percentage_answer = (correct_answer_count / len(graded_answers)) * 100
446
  percentage_docs = (correct_docs_count / len(graded_retrieval)) * 100
447
 
448
+ st.subheader("Run Results")
449
  st.info(
450
  "`I will grade the chain based on: 1/ the relevance of the retrived documents relative to the question and 2/ "
451
  "the summarized answer relative to the ground truth answer. You can see (and change) to prompts used for "
 
453
  st.dataframe(data=d, use_container_width=True)
454
 
455
  # Accumulate results
456
+ st.subheader("Aggregate Results")
457
  st.info(
458
  "`Retrieval and answer scores are percentage of retrived documents deemed relevant by the LLM grader ("
459
  "relative to the question) and percentage of summarized answers deemed relevant (relative to ground truth "