Spaces:

towardsai-tutors
/

buster

Running

Omar Solano commited on Dec 4, 2023

Commit

e9199c3

•

1 Parent(s): bb3039a

add langchain docs (#39)

* add langchain docs

* fix empty source completion

* reorder and rename sources

* bugfix update langchain source name

* new dataset name with renamed sources

* remove tmp file

Files changed (3) hide show

app.py +20 -34
cfg.py +1 -1
data/tmp.py +0 -21

app.py CHANGED Viewed

@@ -18,36 +18,38 @@ from gradio.themes.utils import (
 import cfg
 from cfg import setup_buster
-buster = setup_buster(cfg.buster_cfg)
-#  suppress httpx logs they are spammy and uninformative
-logging.getLogger("httpx").setLevel(logging.WARNING)
-logger = logging.getLogger(__name__)
-logging.basicConfig(level=logging.INFO)
 CONCURRENCY_COUNT = int(os.getenv("CONCURRENCY_COUNT", 64))
 AVAILABLE_SOURCES_UI = [
-    "Towards AI",
-    "HuggingFace",
-    "Wikipedia",
-    "Gen AI 360: LangChain",
     "Gen AI 360: LLMs",
-    "Activeloop",
-    "Open AI",
 ]
 AVAILABLE_SOURCES = [
     "towards_ai",
     "hf_transformers",
     "wikipedia",
-    "langchain_course",
-    "llm_course",
-    "activeloop",
     "openai",
 ]
 def log_likes(completion: Completion, like_data: gr.LikeData):
     # make it a str so json-parsable
@@ -92,24 +94,16 @@ def format_sources(matched_documents: pd.DataFrame) -> str:
     matched_documents.similarity_to_answer = (
         matched_documents.similarity_to_answer * 100
     )
-    # matched_documents["repetition"] = matched_documents.groupby("title")[
-    #     "title"
-    # ].transform("size")
-    # drop duplicates, keep highest ranking ones
     matched_documents = matched_documents.sort_values(
         "similarity_to_answer", ascending=False
     ).drop_duplicates("title", keep="first")
-    # Revert back to correct display
     display_source_to_ui = {
         ui: src for ui, src in zip(AVAILABLE_SOURCES, AVAILABLE_SOURCES_UI)
     }
     matched_documents["source"] = matched_documents["source"].replace(
         display_source_to_ui
     )
     documents = "\n".join(
         [
             document_template.format(document=document)
@@ -136,7 +130,7 @@ def user(user_input, history):
 def get_empty_source_completion(user_input):
     return Completion(
-        user_input=user_input,
         answer_text="You have to select at least one source from the dropdown menu.",
         matched_documents=pd.DataFrame(),
         error=False,
@@ -166,15 +160,7 @@ def get_answer(history, sources: Optional[list[str]] = None):
         yield history, completion
-# CSS = """
-# .contain { display: flex; flex-direction: column; }
-# .gradio-container { height: 100vh !important; }
-# #component-0 { height: 100%; }
-# #chatbot { flex-grow: 1; overflow: auto;}
-# """
 theme = gr.themes.Soft()
-# theme.block_background_fill
-# demo = gr.Blocks(theme=theme)
 with gr.Blocks(
     theme=gr.themes.Soft(
         primary_hue="blue",

 import cfg
 from cfg import setup_buster
 CONCURRENCY_COUNT = int(os.getenv("CONCURRENCY_COUNT", 64))
 AVAILABLE_SOURCES_UI = [
     "Gen AI 360: LLMs",
+    "Gen AI 360: LangChain",
+    "Towards AI Blog",
+    "Activeloop Docs",
+    "HF Transformers Docs",
+    "Wikipedia",
+    "OpenAI Docs",
+    "LangChain Docs",
 ]
 AVAILABLE_SOURCES = [
+    "llm_course",
+    "langchain_course",
     "towards_ai",
+    "activeloop",
     "hf_transformers",
     "wikipedia",
     "openai",
+    "langchain_docs",
 ]
+buster = setup_buster(cfg.buster_cfg)
+#  suppress httpx logs they are spammy and uninformative
+logging.getLogger("httpx").setLevel(logging.WARNING)
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
 def log_likes(completion: Completion, like_data: gr.LikeData):
     # make it a str so json-parsable
     matched_documents.similarity_to_answer = (
         matched_documents.similarity_to_answer * 100
     )
     matched_documents = matched_documents.sort_values(
         "similarity_to_answer", ascending=False
     ).drop_duplicates("title", keep="first")
     display_source_to_ui = {
         ui: src for ui, src in zip(AVAILABLE_SOURCES, AVAILABLE_SOURCES_UI)
     }
     matched_documents["source"] = matched_documents["source"].replace(
         display_source_to_ui
     )
     documents = "\n".join(
         [
             document_template.format(document=document)
 def get_empty_source_completion(user_input):
     return Completion(
+        user_inputs=user_input,
         answer_text="You have to select at least one source from the dropdown menu.",
         matched_documents=pd.DataFrame(),
         error=False,
         yield history, completion
 theme = gr.themes.Soft()
 with gr.Blocks(
     theme=gr.themes.Soft(
         primary_hue="blue",

cfg.py CHANGED Viewed

@@ -23,7 +23,7 @@ ACTIVELOOP_TOKEN = os.getenv("ACTIVELOOP_TOKEN")
 if ACTIVELOOP_TOKEN is None:
     logger.warning("No activeloop token found, you will not be able to fetch data.")
-DEEPLAKE_DATASET = os.getenv("DEEPLAKE_DATASET", "ai-tutor-dataset")
 DEEPLAKE_ORG = os.getenv("DEEPLAKE_ORG", "towards_ai")
 # if you want to use a local dataset, set the env. variable, it overrides all others

 if ACTIVELOOP_TOKEN is None:
     logger.warning("No activeloop token found, you will not be able to fetch data.")
+DEEPLAKE_DATASET = os.getenv("DEEPLAKE_DATASET", "ai-tutor-dataset_debug")
 DEEPLAKE_ORG = os.getenv("DEEPLAKE_ORG", "towards_ai")
 # if you want to use a local dataset, set the env. variable, it overrides all others

data/tmp.py DELETED Viewed

@@ -1,21 +0,0 @@
-# import pandas as pd
-# # Load the CSV
-# df = pd.read_csv('data/wiki.csv')
-# # Count the number of unique titles in the 'title' column
-# unique_titles_count = df['title']
-# print(len(df))
-# # # Remove the 'ranking' column
-# # df.drop('ranking', axis=1, inplace=True)
-# # # Save the CSV again
-# # df.to_csv('data/wiki.csv', index=False)
-import gradio as gr
-gr.themes.builder()