Omar Solano commited on
Commit
139a897
·
1 Parent(s): a24bc71

add openai_cookbooks data

Browse files
Files changed (2) hide show
  1. scripts/main.py +25 -11
  2. scripts/setup.py +21 -14
scripts/main.py CHANGED
@@ -10,9 +10,10 @@ from setup import (
10
  AVAILABLE_SOURCES,
11
  AVAILABLE_SOURCES_UI,
12
  CONCURRENCY_COUNT,
13
- custom_retriever_llamaindex,
 
14
  custom_retriever_peft,
15
- custom_retriever_tf,
16
  custom_retriever_trl,
17
  )
18
 
@@ -20,26 +21,31 @@ from setup import (
20
  def update_query_engine_tools(selected_sources):
21
  tools = []
22
  source_mapping = {
23
- "HF Transformers": (
24
- custom_retriever_tf,
25
  "Transformers_information",
26
  """Useful for general questions asking about the artificial intelligence (AI) field. Employ this tool to fetch general information on topics such as language models theory (transformer architectures), tips on prompting, models, quantization, etc.""",
27
  ),
28
- "PEFT": (
29
  custom_retriever_peft,
30
  "PEFT_information",
31
  """Useful for questions asking about efficient LLM fine-tuning. Employ this tool to fetch information on topics such as LoRA, QLoRA, etc.""",
32
  ),
33
- "TRL": (
34
  custom_retriever_trl,
35
  "TRL_information",
36
  """Useful for questions asking about fine-tuning LLMs with reinforcement learning (RLHF). Includes information about the Supervised Fine-tuning step (SFT), Reward Modeling step (RM), and the Proximal Policy Optimization (PPO) step.""",
37
  ),
38
  "LlamaIndex Docs": (
39
- custom_retriever_llamaindex,
40
  "LlamaIndex_information",
41
  """Useful for questions asking about retrieval augmented generation (RAG) with LLMs and embedding models. It is the documentation of the LlamaIndex framework, includes info about fine-tuning embedding models, building chatbots, and agents with llms, using vector databases, embeddings, information retrieval with cosine similarity or bm25, etc.""",
42
  ),
 
 
 
 
 
43
  }
44
 
45
  for source in selected_sources:
@@ -148,9 +154,11 @@ def format_sources(completion) -> str:
148
  )
149
  all_documents.append(document)
150
 
151
- documents = "\n".join(all_documents)
152
-
153
- return documents_answer_template.format(documents=documents)
 
 
154
 
155
 
156
  def save_completion(completion, history):
@@ -165,7 +173,13 @@ accordion = gr.Accordion(label="Customize Sources (Click to expand)", open=False
165
  sources = gr.CheckboxGroup(
166
  AVAILABLE_SOURCES_UI,
167
  label="Sources",
168
- value=["HF Transformers", "PEFT", "TRL", "LlamaIndex Docs"],
 
 
 
 
 
 
169
  interactive=True,
170
  )
171
  model = gr.Dropdown(
 
10
  AVAILABLE_SOURCES,
11
  AVAILABLE_SOURCES_UI,
12
  CONCURRENCY_COUNT,
13
+ custom_retriever_llama_index,
14
+ custom_retriever_openai_cookbooks,
15
  custom_retriever_peft,
16
+ custom_retriever_transformers,
17
  custom_retriever_trl,
18
  )
19
 
 
21
  def update_query_engine_tools(selected_sources):
22
  tools = []
23
  source_mapping = {
24
+ "Transformers Docs": (
25
+ custom_retriever_transformers,
26
  "Transformers_information",
27
  """Useful for general questions asking about the artificial intelligence (AI) field. Employ this tool to fetch general information on topics such as language models theory (transformer architectures), tips on prompting, models, quantization, etc.""",
28
  ),
29
+ "PEFT Docs": (
30
  custom_retriever_peft,
31
  "PEFT_information",
32
  """Useful for questions asking about efficient LLM fine-tuning. Employ this tool to fetch information on topics such as LoRA, QLoRA, etc.""",
33
  ),
34
+ "TRL Docs": (
35
  custom_retriever_trl,
36
  "TRL_information",
37
  """Useful for questions asking about fine-tuning LLMs with reinforcement learning (RLHF). Includes information about the Supervised Fine-tuning step (SFT), Reward Modeling step (RM), and the Proximal Policy Optimization (PPO) step.""",
38
  ),
39
  "LlamaIndex Docs": (
40
+ custom_retriever_llama_index,
41
  "LlamaIndex_information",
42
  """Useful for questions asking about retrieval augmented generation (RAG) with LLMs and embedding models. It is the documentation of the LlamaIndex framework, includes info about fine-tuning embedding models, building chatbots, and agents with llms, using vector databases, embeddings, information retrieval with cosine similarity or bm25, etc.""",
43
  ),
44
+ "OpenAI Cookbooks": (
45
+ custom_retriever_openai_cookbooks,
46
+ "openai_cookbooks_info",
47
+ """Useful for questions asking about accomplishing common tasks with the OpenAI API. Returns example code and guides stored in Jupyter notebooks, including info about ChatGPT GPT actions, OpenAI Assistants API, and How to fine-tune OpenAI's GPT-4o and GPT-4o-mini models with the OpenAI API.""",
48
+ ),
49
  }
50
 
51
  for source in selected_sources:
 
154
  )
155
  all_documents.append(document)
156
 
157
+ if len(all_documents) == 0:
158
+ return ""
159
+ else:
160
+ documents = "\n".join(all_documents)
161
+ return documents_answer_template.format(documents=documents)
162
 
163
 
164
  def save_completion(completion, history):
 
173
  sources = gr.CheckboxGroup(
174
  AVAILABLE_SOURCES_UI,
175
  label="Sources",
176
+ value=[
177
+ "Transformers Docs",
178
+ "PEFT Docs",
179
+ "TRL Docs",
180
+ "LlamaIndex Docs",
181
+ "OpenAI Cookbooks",
182
+ ],
183
  interactive=True,
184
  )
185
  model = gr.Dropdown(
scripts/setup.py CHANGED
@@ -63,15 +63,19 @@ def setup_database(db_collection, dict_file_name):
63
 
64
 
65
  # Setup retrievers
66
- custom_retriever_tf = setup_database(
67
  "chroma-db-transformers",
68
- "document_dict_tf.pkl",
69
  )
70
  custom_retriever_peft = setup_database("chroma-db-peft", "document_dict_peft.pkl")
71
  custom_retriever_trl = setup_database("chroma-db-trl", "document_dict_trl.pkl")
72
- custom_retriever_llamaindex = setup_database(
73
- "chroma-db-llama-index",
74
- "document_dict_llamaindex.pkl",
 
 
 
 
75
  )
76
 
77
  # Constants
@@ -79,19 +83,21 @@ CONCURRENCY_COUNT = int(os.getenv("CONCURRENCY_COUNT", 64))
79
  MONGODB_URI = os.getenv("MONGODB_URI")
80
 
81
  AVAILABLE_SOURCES_UI = [
82
- "HF Transformers",
83
- "PEFT",
84
- "TRL",
85
  "LlamaIndex Docs",
 
86
  # "Towards AI Blog",
87
  # "RAG Course",
88
  ]
89
 
90
  AVAILABLE_SOURCES = [
91
- "HF_Transformers",
92
- "PEFT",
93
- "TRL",
94
- "LlamaIndex",
 
95
  # "towards_ai_blog",
96
  # "rag_course",
97
  ]
@@ -103,10 +109,11 @@ AVAILABLE_SOURCES = [
103
  # )
104
 
105
  __all__ = [
106
- "custom_retriever_tf",
107
  "custom_retriever_peft",
108
  "custom_retriever_trl",
109
- "custom_retriever_llamaindex",
 
110
  "CONCURRENCY_COUNT",
111
  "MONGODB_URI",
112
  "AVAILABLE_SOURCES_UI",
 
63
 
64
 
65
  # Setup retrievers
66
+ custom_retriever_transformers = setup_database(
67
  "chroma-db-transformers",
68
+ "document_dict_transformers.pkl",
69
  )
70
  custom_retriever_peft = setup_database("chroma-db-peft", "document_dict_peft.pkl")
71
  custom_retriever_trl = setup_database("chroma-db-trl", "document_dict_trl.pkl")
72
+ custom_retriever_llama_index = setup_database(
73
+ "chroma-db-llama_index",
74
+ "document_dict_llama_index.pkl",
75
+ )
76
+ custom_retriever_openai_cookbooks = setup_database(
77
+ "chroma-db-openai_cookbooks",
78
+ "document_dict_openai_cookbooks.pkl",
79
  )
80
 
81
  # Constants
 
83
  MONGODB_URI = os.getenv("MONGODB_URI")
84
 
85
  AVAILABLE_SOURCES_UI = [
86
+ "Transformers Docs",
87
+ "PEFT Docs",
88
+ "TRL Docs",
89
  "LlamaIndex Docs",
90
+ "OpenAI Cookbooks",
91
  # "Towards AI Blog",
92
  # "RAG Course",
93
  ]
94
 
95
  AVAILABLE_SOURCES = [
96
+ "transformers",
97
+ "peft",
98
+ "trl",
99
+ "llama_index",
100
+ "openai_cookbooks",
101
  # "towards_ai_blog",
102
  # "rag_course",
103
  ]
 
109
  # )
110
 
111
  __all__ = [
112
+ "custom_retriever_transformers",
113
  "custom_retriever_peft",
114
  "custom_retriever_trl",
115
+ "custom_retriever_llama_index",
116
+ "custom_retriever_openai_cookbooks",
117
  "CONCURRENCY_COUNT",
118
  "MONGODB_URI",
119
  "AVAILABLE_SOURCES_UI",