Omar Solano
commited on
Commit
·
139a897
1
Parent(s):
a24bc71
add openai_cookbooks data
Browse files- scripts/main.py +25 -11
- scripts/setup.py +21 -14
scripts/main.py
CHANGED
@@ -10,9 +10,10 @@ from setup import (
|
|
10 |
AVAILABLE_SOURCES,
|
11 |
AVAILABLE_SOURCES_UI,
|
12 |
CONCURRENCY_COUNT,
|
13 |
-
|
|
|
14 |
custom_retriever_peft,
|
15 |
-
|
16 |
custom_retriever_trl,
|
17 |
)
|
18 |
|
@@ -20,26 +21,31 @@ from setup import (
|
|
20 |
def update_query_engine_tools(selected_sources):
|
21 |
tools = []
|
22 |
source_mapping = {
|
23 |
-
"
|
24 |
-
|
25 |
"Transformers_information",
|
26 |
"""Useful for general questions asking about the artificial intelligence (AI) field. Employ this tool to fetch general information on topics such as language models theory (transformer architectures), tips on prompting, models, quantization, etc.""",
|
27 |
),
|
28 |
-
"PEFT": (
|
29 |
custom_retriever_peft,
|
30 |
"PEFT_information",
|
31 |
"""Useful for questions asking about efficient LLM fine-tuning. Employ this tool to fetch information on topics such as LoRA, QLoRA, etc.""",
|
32 |
),
|
33 |
-
"TRL": (
|
34 |
custom_retriever_trl,
|
35 |
"TRL_information",
|
36 |
"""Useful for questions asking about fine-tuning LLMs with reinforcement learning (RLHF). Includes information about the Supervised Fine-tuning step (SFT), Reward Modeling step (RM), and the Proximal Policy Optimization (PPO) step.""",
|
37 |
),
|
38 |
"LlamaIndex Docs": (
|
39 |
-
|
40 |
"LlamaIndex_information",
|
41 |
"""Useful for questions asking about retrieval augmented generation (RAG) with LLMs and embedding models. It is the documentation of the LlamaIndex framework, includes info about fine-tuning embedding models, building chatbots, and agents with llms, using vector databases, embeddings, information retrieval with cosine similarity or bm25, etc.""",
|
42 |
),
|
|
|
|
|
|
|
|
|
|
|
43 |
}
|
44 |
|
45 |
for source in selected_sources:
|
@@ -148,9 +154,11 @@ def format_sources(completion) -> str:
|
|
148 |
)
|
149 |
all_documents.append(document)
|
150 |
|
151 |
-
|
152 |
-
|
153 |
-
|
|
|
|
|
154 |
|
155 |
|
156 |
def save_completion(completion, history):
|
@@ -165,7 +173,13 @@ accordion = gr.Accordion(label="Customize Sources (Click to expand)", open=False
|
|
165 |
sources = gr.CheckboxGroup(
|
166 |
AVAILABLE_SOURCES_UI,
|
167 |
label="Sources",
|
168 |
-
value=[
|
|
|
|
|
|
|
|
|
|
|
|
|
169 |
interactive=True,
|
170 |
)
|
171 |
model = gr.Dropdown(
|
|
|
10 |
AVAILABLE_SOURCES,
|
11 |
AVAILABLE_SOURCES_UI,
|
12 |
CONCURRENCY_COUNT,
|
13 |
+
custom_retriever_llama_index,
|
14 |
+
custom_retriever_openai_cookbooks,
|
15 |
custom_retriever_peft,
|
16 |
+
custom_retriever_transformers,
|
17 |
custom_retriever_trl,
|
18 |
)
|
19 |
|
|
|
21 |
def update_query_engine_tools(selected_sources):
|
22 |
tools = []
|
23 |
source_mapping = {
|
24 |
+
"Transformers Docs": (
|
25 |
+
custom_retriever_transformers,
|
26 |
"Transformers_information",
|
27 |
"""Useful for general questions asking about the artificial intelligence (AI) field. Employ this tool to fetch general information on topics such as language models theory (transformer architectures), tips on prompting, models, quantization, etc.""",
|
28 |
),
|
29 |
+
"PEFT Docs": (
|
30 |
custom_retriever_peft,
|
31 |
"PEFT_information",
|
32 |
"""Useful for questions asking about efficient LLM fine-tuning. Employ this tool to fetch information on topics such as LoRA, QLoRA, etc.""",
|
33 |
),
|
34 |
+
"TRL Docs": (
|
35 |
custom_retriever_trl,
|
36 |
"TRL_information",
|
37 |
"""Useful for questions asking about fine-tuning LLMs with reinforcement learning (RLHF). Includes information about the Supervised Fine-tuning step (SFT), Reward Modeling step (RM), and the Proximal Policy Optimization (PPO) step.""",
|
38 |
),
|
39 |
"LlamaIndex Docs": (
|
40 |
+
custom_retriever_llama_index,
|
41 |
"LlamaIndex_information",
|
42 |
"""Useful for questions asking about retrieval augmented generation (RAG) with LLMs and embedding models. It is the documentation of the LlamaIndex framework, includes info about fine-tuning embedding models, building chatbots, and agents with llms, using vector databases, embeddings, information retrieval with cosine similarity or bm25, etc.""",
|
43 |
),
|
44 |
+
"OpenAI Cookbooks": (
|
45 |
+
custom_retriever_openai_cookbooks,
|
46 |
+
"openai_cookbooks_info",
|
47 |
+
"""Useful for questions asking about accomplishing common tasks with the OpenAI API. Returns example code and guides stored in Jupyter notebooks, including info about ChatGPT GPT actions, OpenAI Assistants API, and How to fine-tune OpenAI's GPT-4o and GPT-4o-mini models with the OpenAI API.""",
|
48 |
+
),
|
49 |
}
|
50 |
|
51 |
for source in selected_sources:
|
|
|
154 |
)
|
155 |
all_documents.append(document)
|
156 |
|
157 |
+
if len(all_documents) == 0:
|
158 |
+
return ""
|
159 |
+
else:
|
160 |
+
documents = "\n".join(all_documents)
|
161 |
+
return documents_answer_template.format(documents=documents)
|
162 |
|
163 |
|
164 |
def save_completion(completion, history):
|
|
|
173 |
sources = gr.CheckboxGroup(
|
174 |
AVAILABLE_SOURCES_UI,
|
175 |
label="Sources",
|
176 |
+
value=[
|
177 |
+
"Transformers Docs",
|
178 |
+
"PEFT Docs",
|
179 |
+
"TRL Docs",
|
180 |
+
"LlamaIndex Docs",
|
181 |
+
"OpenAI Cookbooks",
|
182 |
+
],
|
183 |
interactive=True,
|
184 |
)
|
185 |
model = gr.Dropdown(
|
scripts/setup.py
CHANGED
@@ -63,15 +63,19 @@ def setup_database(db_collection, dict_file_name):
|
|
63 |
|
64 |
|
65 |
# Setup retrievers
|
66 |
-
|
67 |
"chroma-db-transformers",
|
68 |
-
"
|
69 |
)
|
70 |
custom_retriever_peft = setup_database("chroma-db-peft", "document_dict_peft.pkl")
|
71 |
custom_retriever_trl = setup_database("chroma-db-trl", "document_dict_trl.pkl")
|
72 |
-
|
73 |
-
"chroma-db-
|
74 |
-
"
|
|
|
|
|
|
|
|
|
75 |
)
|
76 |
|
77 |
# Constants
|
@@ -79,19 +83,21 @@ CONCURRENCY_COUNT = int(os.getenv("CONCURRENCY_COUNT", 64))
|
|
79 |
MONGODB_URI = os.getenv("MONGODB_URI")
|
80 |
|
81 |
AVAILABLE_SOURCES_UI = [
|
82 |
-
"
|
83 |
-
"PEFT",
|
84 |
-
"TRL",
|
85 |
"LlamaIndex Docs",
|
|
|
86 |
# "Towards AI Blog",
|
87 |
# "RAG Course",
|
88 |
]
|
89 |
|
90 |
AVAILABLE_SOURCES = [
|
91 |
-
"
|
92 |
-
"
|
93 |
-
"
|
94 |
-
"
|
|
|
95 |
# "towards_ai_blog",
|
96 |
# "rag_course",
|
97 |
]
|
@@ -103,10 +109,11 @@ AVAILABLE_SOURCES = [
|
|
103 |
# )
|
104 |
|
105 |
__all__ = [
|
106 |
-
"
|
107 |
"custom_retriever_peft",
|
108 |
"custom_retriever_trl",
|
109 |
-
"
|
|
|
110 |
"CONCURRENCY_COUNT",
|
111 |
"MONGODB_URI",
|
112 |
"AVAILABLE_SOURCES_UI",
|
|
|
63 |
|
64 |
|
65 |
# Setup retrievers
|
66 |
+
custom_retriever_transformers = setup_database(
|
67 |
"chroma-db-transformers",
|
68 |
+
"document_dict_transformers.pkl",
|
69 |
)
|
70 |
custom_retriever_peft = setup_database("chroma-db-peft", "document_dict_peft.pkl")
|
71 |
custom_retriever_trl = setup_database("chroma-db-trl", "document_dict_trl.pkl")
|
72 |
+
custom_retriever_llama_index = setup_database(
|
73 |
+
"chroma-db-llama_index",
|
74 |
+
"document_dict_llama_index.pkl",
|
75 |
+
)
|
76 |
+
custom_retriever_openai_cookbooks = setup_database(
|
77 |
+
"chroma-db-openai_cookbooks",
|
78 |
+
"document_dict_openai_cookbooks.pkl",
|
79 |
)
|
80 |
|
81 |
# Constants
|
|
|
83 |
MONGODB_URI = os.getenv("MONGODB_URI")
|
84 |
|
85 |
AVAILABLE_SOURCES_UI = [
|
86 |
+
"Transformers Docs",
|
87 |
+
"PEFT Docs",
|
88 |
+
"TRL Docs",
|
89 |
"LlamaIndex Docs",
|
90 |
+
"OpenAI Cookbooks",
|
91 |
# "Towards AI Blog",
|
92 |
# "RAG Course",
|
93 |
]
|
94 |
|
95 |
AVAILABLE_SOURCES = [
|
96 |
+
"transformers",
|
97 |
+
"peft",
|
98 |
+
"trl",
|
99 |
+
"llama_index",
|
100 |
+
"openai_cookbooks",
|
101 |
# "towards_ai_blog",
|
102 |
# "rag_course",
|
103 |
]
|
|
|
109 |
# )
|
110 |
|
111 |
__all__ = [
|
112 |
+
"custom_retriever_transformers",
|
113 |
"custom_retriever_peft",
|
114 |
"custom_retriever_trl",
|
115 |
+
"custom_retriever_llama_index",
|
116 |
+
"custom_retriever_openai_cookbooks",
|
117 |
"CONCURRENCY_COUNT",
|
118 |
"MONGODB_URI",
|
119 |
"AVAILABLE_SOURCES_UI",
|