Spaces:
Running
Running
Omar Solano
commited on
Commit
•
e9199c3
1
Parent(s):
bb3039a
add langchain docs (#39)
Browse files* add langchain docs
* fix empty source completion
* reorder and rename sources
* bugfix update langchain source name
* new dataset name with renamed sources
* remove tmp file
- app.py +20 -34
- cfg.py +1 -1
- data/tmp.py +0 -21
app.py
CHANGED
@@ -18,36 +18,38 @@ from gradio.themes.utils import (
|
|
18 |
import cfg
|
19 |
from cfg import setup_buster
|
20 |
|
21 |
-
buster = setup_buster(cfg.buster_cfg)
|
22 |
-
|
23 |
-
# suppress httpx logs they are spammy and uninformative
|
24 |
-
logging.getLogger("httpx").setLevel(logging.WARNING)
|
25 |
-
|
26 |
-
logger = logging.getLogger(__name__)
|
27 |
-
logging.basicConfig(level=logging.INFO)
|
28 |
-
|
29 |
CONCURRENCY_COUNT = int(os.getenv("CONCURRENCY_COUNT", 64))
|
30 |
|
31 |
AVAILABLE_SOURCES_UI = [
|
32 |
-
"Towards AI",
|
33 |
-
"HuggingFace",
|
34 |
-
"Wikipedia",
|
35 |
-
"Gen AI 360: LangChain",
|
36 |
"Gen AI 360: LLMs",
|
37 |
-
"
|
38 |
-
"
|
|
|
|
|
|
|
|
|
|
|
39 |
]
|
40 |
|
41 |
AVAILABLE_SOURCES = [
|
|
|
|
|
42 |
"towards_ai",
|
|
|
43 |
"hf_transformers",
|
44 |
"wikipedia",
|
45 |
-
"langchain_course",
|
46 |
-
"llm_course",
|
47 |
-
"activeloop",
|
48 |
"openai",
|
|
|
49 |
]
|
50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
def log_likes(completion: Completion, like_data: gr.LikeData):
|
53 |
# make it a str so json-parsable
|
@@ -92,24 +94,16 @@ def format_sources(matched_documents: pd.DataFrame) -> str:
|
|
92 |
matched_documents.similarity_to_answer = (
|
93 |
matched_documents.similarity_to_answer * 100
|
94 |
)
|
95 |
-
|
96 |
-
# matched_documents["repetition"] = matched_documents.groupby("title")[
|
97 |
-
# "title"
|
98 |
-
# ].transform("size")
|
99 |
-
|
100 |
-
# drop duplicates, keep highest ranking ones
|
101 |
matched_documents = matched_documents.sort_values(
|
102 |
"similarity_to_answer", ascending=False
|
103 |
).drop_duplicates("title", keep="first")
|
104 |
|
105 |
-
# Revert back to correct display
|
106 |
display_source_to_ui = {
|
107 |
ui: src for ui, src in zip(AVAILABLE_SOURCES, AVAILABLE_SOURCES_UI)
|
108 |
}
|
109 |
matched_documents["source"] = matched_documents["source"].replace(
|
110 |
display_source_to_ui
|
111 |
)
|
112 |
-
|
113 |
documents = "\n".join(
|
114 |
[
|
115 |
document_template.format(document=document)
|
@@ -136,7 +130,7 @@ def user(user_input, history):
|
|
136 |
|
137 |
def get_empty_source_completion(user_input):
|
138 |
return Completion(
|
139 |
-
|
140 |
answer_text="You have to select at least one source from the dropdown menu.",
|
141 |
matched_documents=pd.DataFrame(),
|
142 |
error=False,
|
@@ -166,15 +160,7 @@ def get_answer(history, sources: Optional[list[str]] = None):
|
|
166 |
yield history, completion
|
167 |
|
168 |
|
169 |
-
# CSS = """
|
170 |
-
# .contain { display: flex; flex-direction: column; }
|
171 |
-
# .gradio-container { height: 100vh !important; }
|
172 |
-
# #component-0 { height: 100%; }
|
173 |
-
# #chatbot { flex-grow: 1; overflow: auto;}
|
174 |
-
# """
|
175 |
theme = gr.themes.Soft()
|
176 |
-
# theme.block_background_fill
|
177 |
-
# demo = gr.Blocks(theme=theme)
|
178 |
with gr.Blocks(
|
179 |
theme=gr.themes.Soft(
|
180 |
primary_hue="blue",
|
|
|
18 |
import cfg
|
19 |
from cfg import setup_buster
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
CONCURRENCY_COUNT = int(os.getenv("CONCURRENCY_COUNT", 64))
|
22 |
|
23 |
AVAILABLE_SOURCES_UI = [
|
|
|
|
|
|
|
|
|
24 |
"Gen AI 360: LLMs",
|
25 |
+
"Gen AI 360: LangChain",
|
26 |
+
"Towards AI Blog",
|
27 |
+
"Activeloop Docs",
|
28 |
+
"HF Transformers Docs",
|
29 |
+
"Wikipedia",
|
30 |
+
"OpenAI Docs",
|
31 |
+
"LangChain Docs",
|
32 |
]
|
33 |
|
34 |
AVAILABLE_SOURCES = [
|
35 |
+
"llm_course",
|
36 |
+
"langchain_course",
|
37 |
"towards_ai",
|
38 |
+
"activeloop",
|
39 |
"hf_transformers",
|
40 |
"wikipedia",
|
|
|
|
|
|
|
41 |
"openai",
|
42 |
+
"langchain_docs",
|
43 |
]
|
44 |
|
45 |
+
buster = setup_buster(cfg.buster_cfg)
|
46 |
+
|
47 |
+
# suppress httpx logs they are spammy and uninformative
|
48 |
+
logging.getLogger("httpx").setLevel(logging.WARNING)
|
49 |
+
|
50 |
+
logger = logging.getLogger(__name__)
|
51 |
+
logging.basicConfig(level=logging.INFO)
|
52 |
+
|
53 |
|
54 |
def log_likes(completion: Completion, like_data: gr.LikeData):
|
55 |
# make it a str so json-parsable
|
|
|
94 |
matched_documents.similarity_to_answer = (
|
95 |
matched_documents.similarity_to_answer * 100
|
96 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
matched_documents = matched_documents.sort_values(
|
98 |
"similarity_to_answer", ascending=False
|
99 |
).drop_duplicates("title", keep="first")
|
100 |
|
|
|
101 |
display_source_to_ui = {
|
102 |
ui: src for ui, src in zip(AVAILABLE_SOURCES, AVAILABLE_SOURCES_UI)
|
103 |
}
|
104 |
matched_documents["source"] = matched_documents["source"].replace(
|
105 |
display_source_to_ui
|
106 |
)
|
|
|
107 |
documents = "\n".join(
|
108 |
[
|
109 |
document_template.format(document=document)
|
|
|
130 |
|
131 |
def get_empty_source_completion(user_input):
|
132 |
return Completion(
|
133 |
+
user_inputs=user_input,
|
134 |
answer_text="You have to select at least one source from the dropdown menu.",
|
135 |
matched_documents=pd.DataFrame(),
|
136 |
error=False,
|
|
|
160 |
yield history, completion
|
161 |
|
162 |
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
theme = gr.themes.Soft()
|
|
|
|
|
164 |
with gr.Blocks(
|
165 |
theme=gr.themes.Soft(
|
166 |
primary_hue="blue",
|
cfg.py
CHANGED
@@ -23,7 +23,7 @@ ACTIVELOOP_TOKEN = os.getenv("ACTIVELOOP_TOKEN")
|
|
23 |
if ACTIVELOOP_TOKEN is None:
|
24 |
logger.warning("No activeloop token found, you will not be able to fetch data.")
|
25 |
|
26 |
-
DEEPLAKE_DATASET = os.getenv("DEEPLAKE_DATASET", "ai-tutor-
|
27 |
DEEPLAKE_ORG = os.getenv("DEEPLAKE_ORG", "towards_ai")
|
28 |
|
29 |
# if you want to use a local dataset, set the env. variable, it overrides all others
|
|
|
23 |
if ACTIVELOOP_TOKEN is None:
|
24 |
logger.warning("No activeloop token found, you will not be able to fetch data.")
|
25 |
|
26 |
+
DEEPLAKE_DATASET = os.getenv("DEEPLAKE_DATASET", "ai-tutor-dataset_debug")
|
27 |
DEEPLAKE_ORG = os.getenv("DEEPLAKE_ORG", "towards_ai")
|
28 |
|
29 |
# if you want to use a local dataset, set the env. variable, it overrides all others
|
data/tmp.py
DELETED
@@ -1,21 +0,0 @@
|
|
1 |
-
# import pandas as pd
|
2 |
-
|
3 |
-
# # Load the CSV
|
4 |
-
# df = pd.read_csv('data/wiki.csv')
|
5 |
-
|
6 |
-
|
7 |
-
# # Count the number of unique titles in the 'title' column
|
8 |
-
# unique_titles_count = df['title']
|
9 |
-
|
10 |
-
# print(len(df))
|
11 |
-
|
12 |
-
# # # Remove the 'ranking' column
|
13 |
-
# # df.drop('ranking', axis=1, inplace=True)
|
14 |
-
|
15 |
-
# # # Save the CSV again
|
16 |
-
# # df.to_csv('data/wiki.csv', index=False)
|
17 |
-
|
18 |
-
|
19 |
-
import gradio as gr
|
20 |
-
|
21 |
-
gr.themes.builder()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|