xangma
commited on
Commit
•
0f43d97
1
Parent(s):
b10b3aa
more fixes!
Browse files
app.py
CHANGED
@@ -172,6 +172,23 @@ with block:
|
|
172 |
with gr.Column(scale=2):
|
173 |
all_collections_to_get = gr.List(headers=['New Collections to make'],row_count=3, label='Collections_to_get', show_label=True, interactive=True, max_cols=1, max_rows=3)
|
174 |
make_collections_button = gr.Button(value="Make new collection(s)", variant="secondary").style(full_width=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
175 |
with gr.Column(scale=2):
|
176 |
collections_viewer = gr.CheckboxGroup(choices=[], label='Collections_viewer', show_label=True)
|
177 |
with gr.Column(scale=1):
|
@@ -193,7 +210,7 @@ with block:
|
|
193 |
message.submit(chat, inputs=[message, history_state, agent_state], outputs=[chatbot, history_state])
|
194 |
|
195 |
load_collections_button.click(merge_collections, inputs=[collections_viewer, vs_state], outputs=[vs_state])#.then(change_tab, None, tabs) #.then(set_chain_up, inputs=[openai_api_key_textbox, model_selector, k_textbox, max_tokens_textbox, vs_state, agent_state], outputs=[agent_state])
|
196 |
-
make_collections_button.click(ingest_docs, inputs=[all_collections_state, all_collections_to_get], outputs=[all_collections_state], show_progress=True).then(update_checkboxgroup, inputs = [all_collections_state], outputs = [collections_viewer])
|
197 |
delete_collections_button.click(delete_collection, inputs=[all_collections_state, collections_viewer], outputs=[all_collections_state, collections_viewer]).then(update_checkboxgroup, inputs = [all_collections_state], outputs = [collections_viewer])
|
198 |
delete_all_collections_button.click(delete_all_collections, inputs=[all_collections_state], outputs=[all_collections_state]).then(update_checkboxgroup, inputs = [all_collections_state], outputs = [collections_viewer])
|
199 |
get_all_collection_names_button.click(list_collections, inputs=[all_collections_state], outputs=[all_collections_state]).then(update_checkboxgroup, inputs = [all_collections_state], outputs = [collections_viewer])
|
|
|
172 |
with gr.Column(scale=2):
|
173 |
all_collections_to_get = gr.List(headers=['New Collections to make'],row_count=3, label='Collections_to_get', show_label=True, interactive=True, max_cols=1, max_rows=3)
|
174 |
make_collections_button = gr.Button(value="Make new collection(s)", variant="secondary").style(full_width=False)
|
175 |
+
with gr.Row():
|
176 |
+
chunk_size_textbox = gr.Textbox(
|
177 |
+
placeholder="Chunk size",
|
178 |
+
label="Chunk size",
|
179 |
+
show_label=True,
|
180 |
+
lines=1,
|
181 |
+
)
|
182 |
+
chunk_overlap_textbox = gr.Textbox(
|
183 |
+
placeholder="Chunk overlap",
|
184 |
+
label="Chunk overlap",
|
185 |
+
show_label=True,
|
186 |
+
lines=1,
|
187 |
+
)
|
188 |
+
chunk_size_textbox.value = "1000"
|
189 |
+
chunk_overlap_textbox.value = "1000"
|
190 |
+
with gr.Row():
|
191 |
+
gr.HTML('<center>See the <a href=https://python.langchain.com/en/latest/reference/modules/text_splitter.html>Langchain textsplitter docs</a></center>')
|
192 |
with gr.Column(scale=2):
|
193 |
collections_viewer = gr.CheckboxGroup(choices=[], label='Collections_viewer', show_label=True)
|
194 |
with gr.Column(scale=1):
|
|
|
210 |
message.submit(chat, inputs=[message, history_state, agent_state], outputs=[chatbot, history_state])
|
211 |
|
212 |
load_collections_button.click(merge_collections, inputs=[collections_viewer, vs_state], outputs=[vs_state])#.then(change_tab, None, tabs) #.then(set_chain_up, inputs=[openai_api_key_textbox, model_selector, k_textbox, max_tokens_textbox, vs_state, agent_state], outputs=[agent_state])
|
213 |
+
make_collections_button.click(ingest_docs, inputs=[all_collections_state, all_collections_to_get, chunk_size_textbox, chunk_overlap_textbox], outputs=[all_collections_state], show_progress=True).then(update_checkboxgroup, inputs = [all_collections_state], outputs = [collections_viewer])
|
214 |
delete_collections_button.click(delete_collection, inputs=[all_collections_state, collections_viewer], outputs=[all_collections_state, collections_viewer]).then(update_checkboxgroup, inputs = [all_collections_state], outputs = [collections_viewer])
|
215 |
delete_all_collections_button.click(delete_all_collections, inputs=[all_collections_state], outputs=[all_collections_state]).then(update_checkboxgroup, inputs = [all_collections_state], outputs = [collections_viewer])
|
216 |
get_all_collection_names_button.click(list_collections, inputs=[all_collections_state], outputs=[all_collections_state]).then(update_checkboxgroup, inputs = [all_collections_state], outputs = [collections_viewer])
|
ingest.py
CHANGED
@@ -69,7 +69,7 @@ def get_text(content):
|
|
69 |
else:
|
70 |
return ""
|
71 |
|
72 |
-
def ingest_docs(all_collections_state, urls):
|
73 |
"""Get documents from web pages."""
|
74 |
all_docs = []
|
75 |
|
@@ -77,9 +77,9 @@ def ingest_docs(all_collections_state, urls):
|
|
77 |
documents = []
|
78 |
shutil.rmtree('downloaded/', ignore_errors=True)
|
79 |
known_exts = ["py", "md"]
|
80 |
-
py_splitter = PythonCodeTextSplitter(chunk_size=
|
81 |
-
text_splitter = RecursiveCharacterTextSplitter(chunk_size=
|
82 |
-
md_splitter = MarkdownTextSplitter(chunk_size=
|
83 |
for url in urls:
|
84 |
paths_by_ext = {}
|
85 |
docs_by_ext = {}
|
@@ -131,6 +131,8 @@ def ingest_docs(all_collections_state, urls):
|
|
131 |
res = subprocess.run(["cp", "-r", (temp_path / folder).as_posix(), '/'.join(destination.split('/')[:-1])])
|
132 |
folder = destination
|
133 |
local_repo_path_1 = folder
|
|
|
|
|
134 |
for root, dirs, files in os.walk(local_repo_path_1):
|
135 |
for file in files:
|
136 |
file_path = os.path.join(root, file)
|
|
|
69 |
else:
|
70 |
return ""
|
71 |
|
72 |
+
def ingest_docs(all_collections_state, urls, chunk_size, chunk_overlap):
|
73 |
"""Get documents from web pages."""
|
74 |
all_docs = []
|
75 |
|
|
|
77 |
documents = []
|
78 |
shutil.rmtree('downloaded/', ignore_errors=True)
|
79 |
known_exts = ["py", "md"]
|
80 |
+
py_splitter = PythonCodeTextSplitter(chunk_size=int(chunk_size), chunk_overlap=int(chunk_overlap))
|
81 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=int(chunk_size), chunk_overlap=int(chunk_overlap))
|
82 |
+
md_splitter = MarkdownTextSplitter(chunk_size=int(chunk_size), chunk_overlap=int(chunk_overlap))
|
83 |
for url in urls:
|
84 |
paths_by_ext = {}
|
85 |
docs_by_ext = {}
|
|
|
131 |
res = subprocess.run(["cp", "-r", (temp_path / folder).as_posix(), '/'.join(destination.split('/')[:-1])])
|
132 |
folder = destination
|
133 |
local_repo_path_1 = folder
|
134 |
+
if local_repo_path_1 == '.':
|
135 |
+
local_repo_path_1 = os.getcwd()
|
136 |
for root, dirs, files in os.walk(local_repo_path_1):
|
137 |
for file in files:
|
138 |
file_path = os.path.join(root, file)
|