Spaces:

towardsai-tutors
/

ai-tutor-chatbot

Running

App Files Files Community

Omar Solano commited on Jul 28, 2024

Commit

139a897

1 Parent(s): a24bc71

add openai_cookbooks data

Browse files

Files changed (2) hide show

scripts/main.py +25 -11
scripts/setup.py +21 -14

scripts/main.py CHANGED Viewed

@@ -10,9 +10,10 @@ from setup import (
     AVAILABLE_SOURCES,
     AVAILABLE_SOURCES_UI,
     CONCURRENCY_COUNT,
-    custom_retriever_llamaindex,
     custom_retriever_peft,
-    custom_retriever_tf,
     custom_retriever_trl,
 )
@@ -20,26 +21,31 @@ from setup import (
 def update_query_engine_tools(selected_sources):
     tools = []
     source_mapping = {
-        "HF Transformers": (
-            custom_retriever_tf,
             "Transformers_information",
             """Useful for general questions asking about the artificial intelligence (AI) field. Employ this tool to fetch general information on topics such as language models theory (transformer architectures), tips on prompting, models, quantization, etc.""",
         ),
-        "PEFT": (
             custom_retriever_peft,
             "PEFT_information",
             """Useful for questions asking about efficient LLM fine-tuning. Employ this tool to fetch information on topics such as LoRA, QLoRA, etc.""",
         ),
-        "TRL": (
             custom_retriever_trl,
             "TRL_information",
             """Useful for questions asking about fine-tuning LLMs with reinforcement learning (RLHF). Includes information about the Supervised Fine-tuning step (SFT), Reward Modeling step (RM), and the Proximal Policy Optimization (PPO) step.""",
         ),
         "LlamaIndex Docs": (
-            custom_retriever_llamaindex,
             "LlamaIndex_information",
             """Useful for questions asking about retrieval augmented generation (RAG) with LLMs and embedding models. It is the documentation of the LlamaIndex framework, includes info about fine-tuning embedding models, building chatbots, and agents with llms, using vector databases, embeddings, information retrieval with cosine similarity or bm25, etc.""",
         ),
     }
     for source in selected_sources:
@@ -148,9 +154,11 @@ def format_sources(completion) -> str:
             )
             all_documents.append(document)
-    documents = "\n".join(all_documents)
-    return documents_answer_template.format(documents=documents)
 def save_completion(completion, history):
@@ -165,7 +173,13 @@ accordion = gr.Accordion(label="Customize Sources (Click to expand)", open=False
 sources = gr.CheckboxGroup(
     AVAILABLE_SOURCES_UI,
     label="Sources",
-    value=["HF Transformers", "PEFT", "TRL", "LlamaIndex Docs"],
     interactive=True,
 )
 model = gr.Dropdown(

     AVAILABLE_SOURCES,
     AVAILABLE_SOURCES_UI,
     CONCURRENCY_COUNT,
+    custom_retriever_llama_index,
+    custom_retriever_openai_cookbooks,
     custom_retriever_peft,
+    custom_retriever_transformers,
     custom_retriever_trl,
 )
 def update_query_engine_tools(selected_sources):
     tools = []
     source_mapping = {
+        "Transformers Docs": (
+            custom_retriever_transformers,
             "Transformers_information",
             """Useful for general questions asking about the artificial intelligence (AI) field. Employ this tool to fetch general information on topics such as language models theory (transformer architectures), tips on prompting, models, quantization, etc.""",
         ),
+        "PEFT Docs": (
             custom_retriever_peft,
             "PEFT_information",
             """Useful for questions asking about efficient LLM fine-tuning. Employ this tool to fetch information on topics such as LoRA, QLoRA, etc.""",
         ),
+        "TRL Docs": (
             custom_retriever_trl,
             "TRL_information",
             """Useful for questions asking about fine-tuning LLMs with reinforcement learning (RLHF). Includes information about the Supervised Fine-tuning step (SFT), Reward Modeling step (RM), and the Proximal Policy Optimization (PPO) step.""",
         ),
         "LlamaIndex Docs": (
+            custom_retriever_llama_index,
             "LlamaIndex_information",
             """Useful for questions asking about retrieval augmented generation (RAG) with LLMs and embedding models. It is the documentation of the LlamaIndex framework, includes info about fine-tuning embedding models, building chatbots, and agents with llms, using vector databases, embeddings, information retrieval with cosine similarity or bm25, etc.""",
         ),
+        "OpenAI Cookbooks": (
+            custom_retriever_openai_cookbooks,
+            "openai_cookbooks_info",
+            """Useful for questions asking about accomplishing common tasks with the OpenAI API. Returns example code and guides stored in Jupyter notebooks, including info about ChatGPT GPT actions, OpenAI Assistants API,  and How to fine-tune OpenAI's GPT-4o and GPT-4o-mini models with the OpenAI API.""",
+        ),
     }
     for source in selected_sources:
             )
             all_documents.append(document)
+    if len(all_documents) == 0:
+        return ""
+    else:
+        documents = "\n".join(all_documents)
+        return documents_answer_template.format(documents=documents)
 def save_completion(completion, history):
 sources = gr.CheckboxGroup(
     AVAILABLE_SOURCES_UI,
     label="Sources",
+    value=[
+        "Transformers Docs",
+        "PEFT Docs",
+        "TRL Docs",
+        "LlamaIndex Docs",
+        "OpenAI Cookbooks",
+    ],
     interactive=True,
 )
 model = gr.Dropdown(

scripts/setup.py CHANGED Viewed

@@ -63,15 +63,19 @@ def setup_database(db_collection, dict_file_name):
 # Setup retrievers
-custom_retriever_tf = setup_database(
     "chroma-db-transformers",
-    "document_dict_tf.pkl",
 )
 custom_retriever_peft = setup_database("chroma-db-peft", "document_dict_peft.pkl")
 custom_retriever_trl = setup_database("chroma-db-trl", "document_dict_trl.pkl")
-custom_retriever_llamaindex = setup_database(
-    "chroma-db-llama-index",
-    "document_dict_llamaindex.pkl",
 )
 # Constants
@@ -79,19 +83,21 @@ CONCURRENCY_COUNT = int(os.getenv("CONCURRENCY_COUNT", 64))
 MONGODB_URI = os.getenv("MONGODB_URI")
 AVAILABLE_SOURCES_UI = [
-    "HF Transformers",
-    "PEFT",
-    "TRL",
     "LlamaIndex Docs",
     # "Towards AI Blog",
     # "RAG Course",
 ]
 AVAILABLE_SOURCES = [
-    "HF_Transformers",
-    "PEFT",
-    "TRL",
-    "LlamaIndex",
     # "towards_ai_blog",
     # "rag_course",
 ]
@@ -103,10 +109,11 @@ AVAILABLE_SOURCES = [
 # )
 __all__ = [
-    "custom_retriever_tf",
     "custom_retriever_peft",
     "custom_retriever_trl",
-    "custom_retriever_llamaindex",
     "CONCURRENCY_COUNT",
     "MONGODB_URI",
     "AVAILABLE_SOURCES_UI",

 # Setup retrievers
+custom_retriever_transformers = setup_database(
     "chroma-db-transformers",
+    "document_dict_transformers.pkl",
 )
 custom_retriever_peft = setup_database("chroma-db-peft", "document_dict_peft.pkl")
 custom_retriever_trl = setup_database("chroma-db-trl", "document_dict_trl.pkl")
+custom_retriever_llama_index = setup_database(
+    "chroma-db-llama_index",
+    "document_dict_llama_index.pkl",
+)
+custom_retriever_openai_cookbooks = setup_database(
+    "chroma-db-openai_cookbooks",
+    "document_dict_openai_cookbooks.pkl",
 )
 # Constants
 MONGODB_URI = os.getenv("MONGODB_URI")
 AVAILABLE_SOURCES_UI = [
+    "Transformers Docs",
+    "PEFT Docs",
+    "TRL Docs",
     "LlamaIndex Docs",
+    "OpenAI Cookbooks",
     # "Towards AI Blog",
     # "RAG Course",
 ]
 AVAILABLE_SOURCES = [
+    "transformers",
+    "peft",
+    "trl",
+    "llama_index",
+    "openai_cookbooks",
     # "towards_ai_blog",
     # "rag_course",
 ]
 # )
 __all__ = [
+    "custom_retriever_transformers",
     "custom_retriever_peft",
     "custom_retriever_trl",
+    "custom_retriever_llama_index",
+    "custom_retriever_openai_cookbooks",
     "CONCURRENCY_COUNT",
     "MONGODB_URI",
     "AVAILABLE_SOURCES_UI",